From 02e4d4bd1bb26744c6424b4c61f9504045f04d74 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 26 Nov 2022 14:12:13 +0100 Subject: [PATCH] Optimize indexing in loops --- oscar64/InterCode.cpp | 129 ++++++++++++++++++++++++++++++-- oscar64/InterCode.h | 6 ++ oscar64/NativeCodeGenerator.cpp | 16 ++-- 3 files changed, 138 insertions(+), 13 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 5b12859..a1dea99 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -2446,6 +2446,13 @@ InterOperand::InterOperand(void) : mTemp(INVALID_TEMPORARY), mType(IT_NONE), mFinal(false), mIntConst(0), mFloatConst(0), mVarIndex(-1), mOperandSize(0), mLinkerObject(nullptr), mMemory(IM_NONE), mStride(1) {} +bool InterOperand::IsNotUByte(void) const +{ + return + mRange.mMinState == IntegerValueRange::S_BOUND && mRange.mMinValue < 0 || + mRange.mMaxState == IntegerValueRange::S_BOUND && mRange.mMaxValue >= 256; +} + bool InterOperand::IsUByte(void) const { return @@ -5869,7 +5876,13 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray case IA_OR: case IA_XOR: - if (ins->mSrc[0].mTemp < 0) + if (ins->mSrc[0].IsUnsigned() && ins->mSrc[1].IsUnsigned()) + { + vr.mMaxState = vr.mMinState = IntegerValueRange::S_BOUND; + vr.mMaxValue = BuildLowerBitsMask(ins->mSrc[1].mRange.mMaxValue) | BuildLowerBitsMask(ins->mSrc[0].mRange.mMaxValue); + vr.mMinValue = 0; + } + else if (ins->mSrc[0].mTemp < 0) { vr = mLocalValueRange[ins->mSrc[1].mTemp]; int64 v = vr.mMaxValue; @@ -8461,7 +8474,7 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& } else if (ins->mCode == IC_COPY || ins->mCode == IC_STRCPY) flushMem = true; - else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR) + else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR || ins->mCode == IC_CONVERSION_OPERATOR) { int j = 0; while (j < mLoadStoreInstructions.Size() && !SameInstruction(ins, mLoadStoreInstructions[j])) @@ -8758,6 +8771,40 @@ void InterCodeBasicBlock::MarkRelevantStatics(void) } } +bool InterCodeBasicBlock::IsTempModifiedInRange(int from, int to, int temp) +{ + for (int i = from; i < to; i++) + if (mInstructions[i]->mDst.mTemp == temp) + return true; + return false; +} + +bool InterCodeBasicBlock::IsTempUsedInRange(int from, int to, int temp) +{ + for (int i = from; i < to; i++) + { + InterInstruction* ins = mInstructions[i]; + for (int j = 0; j < ins->mNumOperands; j++) + if (ins->mSrc[j].mTemp == temp) + return true; + } + return false; +} + +bool InterCodeBasicBlock::IsTempReferencedInRange(int from, int to, int temp) +{ + for (int i = from; i < to; i++) + { + InterInstruction* ins = mInstructions[i]; + if (ins->mDst.mTemp == temp) + return true; + for (int j = 0; j < ins->mNumOperands; j++) + if (ins->mSrc[j].mTemp == temp) + return true; + } + return false; +} + bool InterCodeBasicBlock::CanMoveInstructionDown(int si, int ti) const { InterInstruction* ins = mInstructions[si]; @@ -10368,9 +10415,11 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) { int nins = mInstructions.Size(); - InterCodeBasicBlock* pblock = mEntryBlocks[0]; + InterCodeBasicBlock* pblock = mEntryBlocks[0], *eblock = mFalseJump; if (pblock == this) pblock = mEntryBlocks[1]; + if (eblock == this) + eblock = mTrueJump; if (mInstructions[nins - 1]->mCode == IC_BRANCH && mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && @@ -10389,6 +10438,10 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) GrowingArray tvalues(nullptr); tvalues.SetSize(mEntryRequiredTemps.Size() + 16); + int pi = pblock->mInstructions.Size() - 1; + while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp) + pi--; + int i = 0; while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp) i++; @@ -10401,6 +10454,53 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) { tvalues[lins->mDst.mTemp] = lins; } + else if (lins->mCode == IC_LEA && lins->mSrc[0].mTemp < 0 && lins->mSrc[0].mIntConst == ains->mSrc[0].mIntConst && lins->mSrc[1].mTemp == lins->mDst.mTemp && + pi >= 0 && pblock->mInstructions[pi]->mCode == IC_CONSTANT && ains->mSrc[1].IsUByte() && pblock->mInstructions[pi]->mConst.mIntConst == 0 && + !IsTempReferencedInRange(i + 1, mInstructions.Size(), lins->mDst.mTemp) && !IsTempModifiedInRange(0, i, lins->mDst.mTemp) && + !eblock->mEntryRequiredTemps[lins->mDst.mTemp]) + { + if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16) + return true; + + + InterInstruction* nins = new InterInstruction(lins->mLocation, IC_LEA); + InterInstruction* cins = nullptr; + nins->mSrc[1] = lins->mSrc[1]; + + if (ains->mDst.mType == IT_INT16) + nins->mSrc[0] = ains->mSrc[1]; + else + { + cins = new InterInstruction(lins->mLocation, IC_CONVERSION_OPERATOR); + cins->mOperator = IA_EXT8TO16U; + cins->mSrc[0] = ains->mSrc[1]; + cins->mDst.mMemory = IM_INDIRECT; + cins->mDst.mTemp = spareTemps++; + cins->mDst.mType = IT_INT16; + nins->mSrc[0] = cins->mDst; + } + + nins->mDst.mMemory = IM_INDIRECT; + nins->mDst.mTemp = spareTemps++; + nins->mDst.mType = IT_POINTER; + + for (int j = 0; j < i; j++) + { + InterInstruction* tins = mInstructions[j]; + for (int k = 0; k < tins->mNumOperands; k++) + { + if (tins->mSrc[k].mTemp == lins->mDst.mTemp) + tins->mSrc[k].mTemp = nins->mDst.mTemp; + } + } + + mInstructions.Remove(i); + mInstructions.Insert(0, nins); + if (cins) + mInstructions.Insert(0, cins); + + changed = true; + } else if (lins->mCode == IC_STORE && lins->mSrc[1].mTemp >= 0 && lins->mSrc[1].mIntConst >= 32 && tvalues[lins->mSrc[1].mTemp]) { if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16) @@ -10740,6 +10840,14 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_SHL && IsIntegerType(ins->mDst.mType) && ins->mSrc[0].mTemp < 0 && (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) || ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED || ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED || + ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && + IsIntegerType(ins->mDst.mType) && + (ins->mSrc[0].mTemp >= 0 && ins->mSrc[0].IsNotUByte() && (dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED)) && + (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) || + ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && + IsIntegerType(ins->mDst.mType) && + (ins->mSrc[1].mTemp >= 0 && ins->mSrc[1].IsNotUByte() && (dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED)) && + (dep[ins->mSrc[0].mTemp] == DEP_INDEX || dep[ins->mSrc[0].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED) || ins->mCode == IC_LEA && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED ) { if (dep[ins->mDst.mTemp] != DEP_INDEX_DERIVED) @@ -10868,7 +10976,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa indexins.Push(ins); } - else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) + else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED)) { indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[1].mTemp]; indexBase[ins->mDst.mTemp] = 0; @@ -10885,7 +10993,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa indexins.Push(ains); } - else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED) + else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && (dep[ins->mSrc[0].mTemp] == DEP_INDEX || dep[ins->mSrc[0].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED)) { indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[0].mTemp]; indexBase[ins->mDst.mTemp] = 0; @@ -13587,9 +13695,18 @@ void InterCodeProcedure::Close(void) mEntryBlock->CollectEntryBlocks(nullptr); #endif -#if 1 BuildTraces(false); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); +#if 1 + SingleBlockLoopPointerSplit(activeSet); + + MergeIndexedLoadStore(); +#endif + +#if 1 BuildLoopPrefix(); DisassembleDebug("added dominators"); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 8e5dbf5..6696952 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -270,6 +270,8 @@ public: bool IsSByte(void) const; bool IsUnsigned(void) const; + bool IsNotUByte(void) const; + void Disassemble(FILE* file); }; @@ -478,6 +480,10 @@ public: bool CanMoveInstructionDown(int si, int ti) const; bool MergeCommonPathInstructions(void); + bool IsTempModifiedInRange(int from, int to, int temp); + bool IsTempUsedInRange(int from, int to, int temp); + bool IsTempReferencedInRange(int from, int to, int temp); + void CheckFinalLocal(void); void CheckFinal(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index a850fbe..2465b54 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -15541,7 +15541,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool // int nins = mIns.Size(), tins = mTrueJump->mIns.Size(), fins = mFalseJump->mIns.Size(); if (nins > 1 && tins > 0 && fins > 0 && mFalseJump->mIns[0].mType == ASMIT_TXA && - !mIns[nins - 1].ChangesAccu() && !mFalseJump->mEntryRequiredRegs[CPU_REG_A]) + mIns[nins - 2].mType == ASMIT_TAX && !mIns[nins - 1].ChangesAccu() && !mFalseJump->mEntryRequiredRegs[CPU_REG_A]) { mTrueJump->mIns.Push(NativeCodeInstruction(ASMIT_TXA)); mFalseJump->mIns[0].mType = ASMIT_NOP; mFalseJump->mIns[0].mMode = ASMIM_IMPLIED; @@ -30990,7 +30990,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "plants_animate"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "menu_draw_color_line"); #if 1 int step = 0; @@ -31028,6 +31028,7 @@ void NativeCodeProcedure::Optimize(void) RebuildEntry(); + #if 1 if (step > 3) { @@ -31061,7 +31062,6 @@ void NativeCodeProcedure::Optimize(void) mEntryBlock->ReplaceFinalZeroPageUse(this); } #endif - #if 1 do { @@ -31071,10 +31071,13 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); changed = mEntryBlock->RemoveUnusedResultInstructions(); + ResetVisited(); NativeRegisterDataSet data; if (mEntryBlock->ValueForwarding(data, step > 0, step == 7)) + { changed = true; + } else { #if 1 @@ -31146,7 +31149,6 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif - #if 1 if (step > 0) { @@ -31192,7 +31194,6 @@ void NativeCodeProcedure::Optimize(void) } #endif - #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(true); @@ -31212,6 +31213,8 @@ void NativeCodeProcedure::Optimize(void) } #endif + + #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(true); @@ -31358,7 +31361,6 @@ void NativeCodeProcedure::Optimize(void) } #endif - #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(); @@ -31374,7 +31376,6 @@ void NativeCodeProcedure::Optimize(void) changed = true; #endif - #if 1 if (step >= 4) { @@ -31487,6 +31488,7 @@ void NativeCodeProcedure::Optimize(void) else cnt++; + } while (changed); #if 1