From b9c477976a5b9fe40944837f1565c314a6db4586 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Mon, 20 Dec 2021 20:44:10 +0100 Subject: [PATCH] Strength reduction for simple native code loops with y register --- oscar64/InterCode.cpp | 92 ++++++++++++++++++- oscar64/NativeCodeGenerator.cpp | 153 +++++++++++++++++++++++--------- 2 files changed, 200 insertions(+), 45 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 5a8e2c3..7c483ed 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -4425,6 +4425,32 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins return true; } +static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins) +{ + if (lins->mDst.mTemp >= 0) + { + if (lins->mDst.mTemp == bins->mDst.mTemp) + return false; + + for (int i = 0; i < bins->mNumOperands; i++) + if (lins->mDst.mTemp == bins->mSrc[i].mTemp) + return false; + } + if (bins->mDst.mTemp >= 0) + { + for (int i = 0; i < lins->mNumOperands; i++) + if (bins->mDst.mTemp == lins->mSrc[i].mTemp) + return false; + } + if (bins->mCode == IC_PUSH_FRAME || bins->mCode == IC_POP_FRAME) + { + if (lins->mCode == IC_CONSTANT && lins->mDst.mType == IT_POINTER && lins->mConst.mMemory == IM_FRAME) + return false; + } + + return true; +} + static bool IsChained(const InterInstruction* ins, const InterInstruction* nins) { if (ins->mDst.mTemp >= 0) @@ -4443,27 +4469,39 @@ static bool CanBypassStore(const InterInstruction * sins, const InterInstruction return false; InterMemory sm = IM_NONE, bm = IM_NONE; - int bi = -1, si = -1; + int bi = -1, si = -1, bt = -1, st = -1, bo = 0, so = 0, bz = 1, sz = 1; if (sins->mCode == IC_LOAD) { sm = sins->mSrc[0].mMemory; si = sins->mSrc[0].mVarIndex; + st = sins->mSrc[0].mTemp; + so = sins->mSrc[0].mIntConst; + sz = InterTypeSize[sins->mDst.mType]; } else if (sins->mCode == IC_LEA || sins->mCode == IC_STORE) { sm = sins->mSrc[1].mMemory; si = sins->mSrc[1].mVarIndex; + st = sins->mSrc[1].mTemp; + so = sins->mSrc[1].mIntConst; + sz = InterTypeSize[sins->mSrc[0].mType]; } if (bins->mCode == IC_LOAD) { bm = bins->mSrc[0].mMemory; bi = bins->mSrc[0].mVarIndex; + st = sins->mSrc[0].mTemp; + bo = sins->mSrc[0].mIntConst; + bz = InterTypeSize[sins->mDst.mType]; } else if (bins->mCode == IC_LEA || bins->mCode == IC_STORE) { bm = bins->mSrc[1].mMemory; bi = bins->mSrc[1].mVarIndex; + bt = sins->mSrc[1].mTemp; + bo = sins->mSrc[1].mIntConst; + bz = InterTypeSize[sins->mSrc[1].mType]; } // Check ambiguity @@ -4481,6 +4519,10 @@ static bool CanBypassStore(const InterInstruction * sins, const InterInstruction else return false; } + else if (sm == IM_INDIRECT && bm == IM_INDIRECT && st == bt) + { + return so + sz <= bz || bo + bz <= so; + } else return false; } @@ -4727,6 +4769,10 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa { ins->mInvariant = false; } + else if (ins->mSrc[0].mMemory == IM_LOCAL && hasCall) + { + ins->mInvariant = false; + } else { for (int j = 0; j < mInstructions.Size(); j++) @@ -4961,10 +5007,54 @@ void InterCodeBasicBlock::PeepholeOptimization(void) if (i != j) mInstructions[j] = ins; } + else if (mInstructions[i]->mCode == IC_BINARY_OPERATOR && mInstructions[i]->mSrc[0].mTemp >= 0 && mInstructions[i]->mSrc[0].mFinal && mInstructions[i]->mSrc[1].mTemp >= 0 && mInstructions[i]->mSrc[1].mFinal) + { + InterInstruction* ins(mInstructions[i]); + int j = i; + while (j > 0 && CanBypassUp(ins, mInstructions[j - 1])) + { + mInstructions[j] = mInstructions[j - 1]; + j--; + } + if (i != j) + mInstructions[j] = ins; + } i++; } + i = limit; + while (i >= 0) + { + // move non indirect loads down + if (mInstructions[i]->mCode == IC_LOAD && (mInstructions[i]->mSrc[0].mMemory != IM_INDIRECT || mInstructions[i]->mDst.mType != IT_INT8)) + { + InterInstruction* ins(mInstructions[i]); + int j = i; + while (j < limit && CanBypassLoad(ins, mInstructions[j + 1])) + { + mInstructions[j] = mInstructions[j + 1]; + j++; + } + if (i != j) + mInstructions[j] = ins; + } + else if (mInstructions[i]->mCode == IC_LEA && mInstructions[i]->mSrc[0].mTemp == -1) + { + InterInstruction* ins(mInstructions[i]); + int j = i; + while (j < limit && CanBypass(ins, mInstructions[j + 1])) + { + mInstructions[j] = mInstructions[j + 1]; + j++; + } + if (i != j) + mInstructions[j] = ins; + } + + i--; + } + bool changed; do { diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 096f798..fd7f48c 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -9290,26 +9290,29 @@ void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesAccu()) ai++; - if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) + if (ai < lblock->mIns.Size()) { - int i = ai + 1; - while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu()) - i++; - if (i == lblock->mIns.Size()) + if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) { - mIns.Push(lblock->mIns[ai]); - lblock->mIns.Remove(ai); + int i = ai + 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu()) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); + } } - } - else if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) - { - int i = ai + 1; - while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress)) - i++; - if (i == lblock->mIns.Size()) + else if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) { - mIns.Push(lblock->mIns[ai]); - lblock->mIns.Remove(ai); + int i = ai + 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[ai].mAddress)) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); + } } } @@ -9317,26 +9320,64 @@ void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesYReg()) ai++; - if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) + if (ai < lblock->mIns.Size()) { - int i = ai + 1; - while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg()) - i++; - if (i == lblock->mIns.Size()) + if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) { - mIns.Push(lblock->mIns[ai]); - lblock->mIns.Remove(ai); + int i = ai + 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg()) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); + } } - } - else if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) - { - int i = ai + 1; - while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress)) - i++; - if (i == lblock->mIns.Size()) + else if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) { - mIns.Push(lblock->mIns[ai]); - lblock->mIns.Remove(ai); + int i = 0; + while (i < lblock->mIns.Size() && (i == ai || !lblock->mIns[i].ChangesYReg())) + i++; + if (i == lblock->mIns.Size()) + { + int addr = lblock->mIns[ai].mAddress; + i = 0; + while (i < lblock->mIns.Size() && + (lblock->mIns[i].mMode != ASMIM_ZERO_PAGE || lblock->mIns[i].mAddress != addr || + lblock->mIns[i].mType == ASMIT_LDA || lblock->mIns[i].mType == ASMIT_STA || lblock->mIns[i].mType == ASMIT_INC || lblock->mIns[i].mType == ASMIT_DEC || lblock->mIns[i].mType == ASMIT_LDY)) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, addr)); + lblock->mFalseJump->mIns.Push(NativeCodeInstruction(ASMIT_STY, ASMIM_ZERO_PAGE, addr)); + for (int i = 0; i < lblock->mIns.Size(); i++) + { + if (lblock->mIns[i].mMode == ASMIM_ZERO_PAGE && lblock->mIns[i].mAddress == addr) + { + if (lblock->mIns[i].mType == ASMIT_LDA) + { + lblock->mIns[i].mType = ASMIT_TYA; lblock->mIns[i].mMode = ASMIM_IMPLIED; + } + else if (lblock->mIns[i].mType == ASMIT_STA) + { + lblock->mIns[i].mType = ASMIT_TAY; lblock->mIns[i].mMode = ASMIM_IMPLIED; + } + else if (lblock->mIns[i].mType == ASMIT_LDY) + { + lblock->mIns[i].mType = ASMIT_NOP; lblock->mIns[i].mMode = ASMIM_IMPLIED; + } + else if (lblock->mIns[i].mType == ASMIT_INC) + { + lblock->mIns[i].mType = ASMIT_INY; lblock->mIns[i].mMode = ASMIM_IMPLIED; + } + else if (lblock->mIns[i].mType == ASMIT_DEC) + { + lblock->mIns[i].mType = ASMIT_DEY; lblock->mIns[i].mMode = ASMIM_IMPLIED; + } + } + } + } + } } } } @@ -9356,9 +9397,13 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) if (lb->mIns[lbs-1].mType == ASMIT_CPX) { - if (lb->mIns[lbs-2].mType == ASMIT_INX && mIns.Last().mType == ASMIT_LDX && mIns.Last().mMode == ASMIM_IMMEDIATE && mIns.Last().mAddress == 0) + int li = mIns.Size() - 1; + while (li >= 0 && !mIns[li].ChangesXReg()) + li--; + + if (li >= 0 && lb->mIns[lbs-2].mType == ASMIT_INX && mIns[li].mType == ASMIT_LDX && mIns[li].mMode == ASMIM_IMMEDIATE) { - if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE) + if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE && mIns[li].mAddress == 0) { int a = lb->mIns[lbs - 1].mAddress; @@ -9367,14 +9412,30 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) i++; if (i + 2 == lbs) { - mIns[mIns.Size() - 1].mMode = ASMIM_ZERO_PAGE; - mIns[mIns.Size() - 1].mAddress = a; + mIns[li].mMode = ASMIM_ZERO_PAGE; + mIns[li].mAddress = a; + lb->mIns[lbs - 2].mType = ASMIT_DEX; + lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BNE; + } + } + else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) + { + int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; + + int i = 0; + while (i + 2 < lbs && !lb->mIns[i].RequiresXReg()) + i++; + if (i + 2 == lbs) + { + mIns[li].mAddress = a; lb->mIns[lbs - 2].mType = ASMIT_DEX; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; } } } + } else if (lb->mIns[lbs - 1].mType == ASMIT_CPY) { @@ -12917,14 +12978,18 @@ void NativeCodeProcedure::Optimize(void) changed = true; #endif #if 1 - ResetVisited(); - if (mEntryBlock->OptimizeSimpleLoop(this)) - changed = true; + + if (step > 0) + { + ResetVisited(); + if (mEntryBlock->OptimizeSimpleLoop(this)) + changed = true; - ResetVisited(); - if (mEntryBlock->SimpleLoopReversal(this)) - changed = true; + ResetVisited(); + if (mEntryBlock->SimpleLoopReversal(this)) + changed = true; + } ResetVisited(); if (mEntryBlock->MergeBasicBlocks()) @@ -13194,7 +13259,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode } else if (i + 2 < iblock->mInstructions.Size() && InterTypeSize[ins->mDst.mType] >= 2 && - iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] == 2 && + iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] >= 2 && iblock->mInstructions[i + 1]->mDst.mTemp != ins->mDst.mTemp && iblock->mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 2]->mSrc[0].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[0].mFinal && @@ -13205,7 +13270,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode } else if (i + 2 < iblock->mInstructions.Size() && InterTypeSize[ins->mDst.mType] >= 2 && - iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] == 2 && + iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] >= 2 && iblock->mInstructions[i + 1]->mDst.mTemp != ins->mDst.mTemp && iblock->mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 2]->mSrc[1].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[1].mFinal &&