From 60868c71d3d72c3d10f1c6e8d2a317485f3d38e1 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Wed, 31 Jul 2024 21:11:13 +0200 Subject: [PATCH] Fix single tail loop opt for search loops (e.g. strlen) --- autotest/autotest.bat | 3 +++ autotest/strlen.c | 27 +++++++++++++++++++++++++++ oscar64/InterCode.cpp | 31 +++++++++++++++++++++++-------- oscar64/NativeCodeGenerator.cpp | 10 ++++++---- 4 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 autotest/strlen.c diff --git a/autotest/autotest.bat b/autotest/autotest.bat index 5787a75..6abe7fa 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -87,6 +87,9 @@ rem @echo off @call :test fastcalltest.c @if %errorlevel% neq 0 goto :error +@call :test strlen.c +@if %errorlevel% neq 0 goto :error + @call :test strcmptest.c @if %errorlevel% neq 0 goto :error diff --git a/autotest/strlen.c b/autotest/strlen.c new file mode 100644 index 0000000..41ee4dc --- /dev/null +++ b/autotest/strlen.c @@ -0,0 +1,27 @@ +#include +#include + +char lstr[1025]; + +int main(void) +{ +#if 1 + assert(strlen("") == 0); + assert(strlen("1") == 1); + assert(strlen("12") == 2); + assert(strlen("123") == 3); + assert(strlen("1234") == 4); + assert(strlen("12345") == 5); + assert(strlen("123456") == 6); +#endif +#if 1 + char * dp = lstr; + for(int i=0; i<1024; i++) + { + *dp = 0; + assert(strlen(lstr) == i); + *dp++ = 'a'; + } +#endif + return 0; +} diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index eb5880b..6cb69de 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -5547,7 +5547,18 @@ void InterCodeBasicBlock::Append(InterInstruction * code) void InterCodeBasicBlock::AppendBeforeBranch(InterInstruction* code) { - mInstructions.Insert(mInstructions.Size() - 1, code); + int ti = mInstructions.Size() - 1; + if (mInstructions[ti]->mCode == IC_BRANCH) + { + if (ti > 0 && mInstructions[ti - 1]->mDst.mTemp == mInstructions[ti]->mSrc[0].mTemp && CanBypassUp(code, mInstructions[ti - 1])) + { + ti--; + if (ti > 0 && mInstructions[ti]->UsesTemp(mInstructions[ti - 1]->mDst.mTemp) && CanBypassUp(code, mInstructions[ti - 1])) + ti--; + } + } + + mInstructions.Insert(ti, code); } const InterInstruction* InterCodeBasicBlock::FindByDst(int dst) const @@ -14867,7 +14878,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar ains->mSrc[1] = lins->mDst; ains->mSrc[0] = lins->mSrc[0]; ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[1].mTemp]; - tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + tail->AppendBeforeBranch(ains); indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst; @@ -14896,7 +14907,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar ains->mSrc[0].mType = lins->mDst.mType; ains->mSrc[0].mTemp = -1; ains->mSrc[0].mIntConst = s; - tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + tail->AppendBeforeBranch(ains); indexScale[ains->mDst.mTemp] = s; @@ -14928,7 +14939,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar ains->mSrc[1] = nins->mDst; ains->mSrc[0] = nins->mSrc[0]; ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[0].mTemp]; - tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + tail->AppendBeforeBranch(ains); ains->mDst.mRange.mMaxValue += ains->mSrc[0].mIntConst; indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst; @@ -14963,7 +14974,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar ains->mSrc[0].mType = lins->mDst.mType; ains->mSrc[0].mTemp = -1; ains->mSrc[0].mIntConst = s; - tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + tail->AppendBeforeBranch(ains); ains->mDst.mRange.mMaxValue += ains->mSrc[0].mIntConst; indexScale[ains->mDst.mTemp] = s; @@ -14975,7 +14986,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } else if (lins->mCode == IC_LEA) { - if (lins->mSrc[0].mTemp >= 0 && lins->mSrc[0].IsNotUByte() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body) && IsLoopInvariantTemp(lins->mSrc[1].mTemp, body)) + if (lins->mSrc[0].mTemp >= 0 && !lins->mSrc[0].IsUByte() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body) && IsLoopInvariantTemp(lins->mSrc[1].mTemp, body)) { mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp; @@ -14990,7 +15001,11 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar ains->mSrc[0].mTemp = -1; ains->mSrc[0].mType = IT_INT16; ains->mSrc[0].mIntConst = indexScale[lins->mSrc[0].mTemp]; - tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + + if (IsTempModifiedInRange(0, i, lins->mSrc[0].mTemp)) + mInstructions.Insert(i, ains); + else + tail->AppendBeforeBranch(ains); modified = true; continue; @@ -21746,7 +21761,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "copy_left"); + CheckFunc = !strcmp(mIdent->mString, "main"); CheckCase = false; mEntryBlock = mBlocks[0]; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index afde6e3..d146139 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -36780,7 +36780,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { int addr = mIns[ai].mAddress; - bool fail = false, changey = false; + bool fail = false, changey = false, changev = false; for (int i = 0; i < mIns.Size(); i++) { @@ -36794,7 +36794,10 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc else if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) { if (mIns[i].mType == ASMIT_STA || mIns[i].mType == ASMIT_INC || mIns[i].mType == ASMIT_DEC) + { changey = true; + changev = true; + } else if (mIns[i].mType != ASMIT_LDA) { fail = true; @@ -36816,7 +36819,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc exitBlock->mEntryRequiredRegs += CPU_REG_Y; prevBlock->mIns.Push(NativeCodeInstruction(mIns[ai].mIns, ASMIT_LDY, ASMIM_ZERO_PAGE, addr)); - if (changey) + if (changev) exitBlock->mIns.Push(NativeCodeInstruction(mIns[ai].mIns, ASMIT_STY, ASMIM_ZERO_PAGE, addr)); for (int i = 0; i < mIns.Size(); i++) { @@ -50208,7 +50211,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc = proc; mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "copy_left"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "strlen"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -51998,7 +52001,6 @@ void NativeCodeProcedure::Optimize(void) changed = mEntryBlock->JoinTailCodeSequences(this, true); } - } while (changed); #endif