From 6f7efd9efc90844f917785f3e45cdef2e300b42f Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 17 Feb 2024 12:06:15 +0100 Subject: [PATCH] Optimized long multiply --- autotest/autotest.bat | 68 ++++++++++----------- autotest/testint32.c | 25 ++++++++ include/crt.c | 52 ++++++++++++++++ oscar64/InterCode.cpp | 138 +++++++++++++++++++++++++++++++++++++++++- oscar64/InterCode.h | 5 +- 5 files changed, 250 insertions(+), 38 deletions(-) diff --git a/autotest/autotest.bat b/autotest/autotest.bat index bef2698..3de8c63 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -232,115 +232,115 @@ echo Failed with error #%errorlevel%. exit /b %errorlevel% :testh -..\release\oscar64 -e -bc %~1 +..\bin\oscar64 -e -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -n %~1 +..\bin\oscar64 -e -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -bc %~1 +..\bin\oscar64 -e -O2 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -n %~1 +..\bin\oscar64 -e -O2 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -n -dHEAPCHECK %~1 +..\bin\oscar64 -e -O2 -n -dHEAPCHECK %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O0 -bc %~1 +..\bin\oscar64 -e -O0 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O0 -n %~1 +..\bin\oscar64 -e -O0 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -Os -bc %~1 +..\bin\oscar64 -e -Os -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -Os -n %~1 +..\bin\oscar64 -e -Os -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -bc %~1 +..\bin\oscar64 -e -O3 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -n %~1 +..\bin\oscar64 -e -O3 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -n -dHEAPCHECK %~1 +..\bin\oscar64 -e -O3 -n -dHEAPCHECK %~1 @if %errorlevel% neq 0 goto :error @exit /b 0 :test -..\release\oscar64 -e -bc %~1 +..\bin\oscar64 -e -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -n %~1 +..\bin\oscar64 -e -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -bc %~1 +..\bin\oscar64 -e -O2 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -n %~1 +..\bin\oscar64 -e -O2 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O0 -bc %~1 +..\bin\oscar64 -e -O0 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O0 -n %~1 +..\bin\oscar64 -e -O0 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -Os -bc %~1 +..\bin\oscar64 -e -Os -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -Os -n %~1 +..\bin\oscar64 -e -Os -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -bc %~1 +..\bin\oscar64 -e -O3 -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -n %~1 +..\bin\oscar64 -e -O3 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -xz -Oz -n %~1 +..\bin\oscar64 -e -O2 -xz -Oz -n %~1 @if %errorlevel% neq 0 goto :error @exit /b 0 :testb -..\release\oscar64 -e -bc %~1 +..\bin\oscar64 -e -bc %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -bc -O2 %~1 +..\bin\oscar64 -e -bc -O2 %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -bc -O0 %~1 +..\bin\oscar64 -e -bc -O0 %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -bc -Os %~1 +..\bin\oscar64 -e -bc -Os %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -bc -O3 %~1 +..\bin\oscar64 -e -bc -O3 %~1 @if %errorlevel% neq 0 goto :error @exit /b 0 :testn -..\release\oscar64 -e -n %~1 +..\bin\oscar64 -e -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -n %~1 +..\bin\oscar64 -e -O2 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O0 -n %~1 +..\bin\oscar64 -e -O0 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -Os -n %~1 +..\bin\oscar64 -e -Os -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O3 -n %~1 +..\bin\oscar64 -e -O3 -n %~1 @if %errorlevel% neq 0 goto :error -..\release\oscar64 -e -O2 -xz -Oz -n %~1 +..\bin\oscar64 -e -O2 -xz -Oz -n %~1 @if %errorlevel% neq 0 goto :error @exit /b 0 diff --git a/autotest/testint32.c b/autotest/testint32.c index b82fa8c..01f27e3 100644 --- a/autotest/testint32.c +++ b/autotest/testint32.c @@ -5,6 +5,11 @@ void testmuli(long a, long b, long ab) assert (a * b == ab); } +void testmulu(unsigned long a, unsigned long b, unsigned long ab) +{ + assert (a * b == ab); +} + void testdivi(long a, long b, long ab) { assert (a / b == ab); @@ -80,6 +85,26 @@ int main(void) testmuli( -1024, 1237, -1266688l); testmuli( -1024,-1237, 1266688l); + testmulu(0x00000001, 0x0000003c, 0x0000003c); + testmulu(0x00000100, 0x0000003c, 0x00003c00); + testmulu(0x00010000, 0x0000003c, 0x003c0000); + testmulu(0x01000000, 0x0000003c, 0x3c000000); + + testmulu(0x0000003c, 0x00000001, 0x0000003c); + testmulu(0x0000003c, 0x00000100, 0x00003c00); + testmulu(0x0000003c, 0x00010000, 0x003c0000); + testmulu(0x0000003c, 0x01000000, 0x3c000000); + + testmulu(0x0000004b, 0x0000003c, 0x00001194); + testmulu(0x00004b00, 0x0000003c, 0x00119400); + testmulu(0x004b0000, 0x0000003c, 0x11940000); + testmulu(0x4b000000, 0x0000003c, 0x94000000); + + testmulu(0x0000003c, 0x0000004b, 0x00001194); + testmulu(0x0000003c, 0x00004b00, 0x00119400); + testmulu(0x0000003c, 0x004b0000, 0x11940000); + testmulu(0x0000003c, 0x4b000000, 0x94000000); + testdivi( 1, 1, 1); testdivi(-1, 1, -1); testdivi( 1, -1, -1); diff --git a/include/crt.c b/include/crt.c index 3989ad2..ad70de8 100644 --- a/include/crt.c +++ b/include/crt.c @@ -695,6 +695,11 @@ L4: sty tmp + 2 } +__asm mul32by8 +{ + +} + __asm mul32 { lda #0 @@ -703,6 +708,52 @@ __asm mul32 sta tmp + 6 sta tmp + 7 + lda tmp + 0 + jsr WM + lda tmp + 1 + jsr WM + lda tmp + 2 + jsr WM + lda tmp + 3 +WM: + bne W0 + ldx accu + 2 + stx accu + 3 + ldx accu + 1 + stx accu + 2 + ldx accu + stx accu + 1 + sta accu + rts +W0: + sec + ror + bcc W1 +L1: tax + clc + lda tmp + 4 + adc accu + sta tmp + 4 + lda tmp + 5 + adc accu + 1 + sta tmp + 5 + lda tmp + 6 + adc accu + 2 + sta tmp + 6 + lda tmp + 7 + adc accu + 3 + sta tmp + 7 + txa +W1: asl accu + rol accu + 1 + rol accu + 2 + rol accu + 3 + lsr + bcc W1 + bne L1 + rts + +#if 0 ldx #32 L1: lsr tmp + 3 ror tmp + 2 @@ -729,6 +780,7 @@ W1: asl accu dex bne L1 rts +#endif } __asm mul16by8 diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 73a435f..1292b32 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -967,8 +967,11 @@ static int64 ConstantFolding(InterOperator oper, InterType type, int64 val1, int return val1 - val2; break; case IA_MUL: - return val1 * val2; - break; + if (type == IT_INT32 && val1 >= 0 && val2 >= 0) + return val1 * val2 & 0xffffffff; + else + return val1 * val2; + case IA_DIVU: if (val2) return (uint64)val1 / (uint64)val2; @@ -11042,6 +11045,123 @@ void InterCodeBasicBlock::LinkerObjectForwarding(const GrowingInstructionPtrArra } } +void InterCodeBasicBlock::ReduceRecursionTempSpilling(InterMemory paramMemory, const GrowingInstructionPtrArray& tvalue) +{ + if (!mVisited) + { + if (!mLoopHead) + { + if (mNumEntries > 0) + { + if (mNumEntered == 0) + mLoadStoreInstructions = tvalue; + else + { + int i = 0; + while (i < mLoadStoreInstructions.Size()) + { + InterInstruction* ins(mLoadStoreInstructions[i]); + InterInstruction* nins = nullptr; + + int j = tvalue.IndexOf(ins); + if (j != -1) + nins = ins; + + if (nins) + mLoadStoreInstructions[i++] = nins; + else + mLoadStoreInstructions.Remove(i); + } + } + + mNumEntered++; + + if (mNumEntered < mNumEntries) + return; + } + } +#if 1 + else if (mNumEntries == 2 && (mTrueJump == this || mFalseJump == this)) + { + mLoadStoreInstructions = tvalue; + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* ins(mInstructions[i]); + if (ins->mDst.mTemp >= 0) + { + int j = 0; + while (j < mLoadStoreInstructions.Size()) + { + if (mLoadStoreInstructions[j]->ReferencesTemp(ins->mDst.mTemp) || CollidingMem(ins, mLoadStoreInstructions[j])) + mLoadStoreInstructions.Remove(j); + else + j++; + } + } + } + } +#endif + else + mLoadStoreInstructions.SetSize(0); + + mVisited = true; + + NumberSet rtemps(mEntryRequiredTemps); + + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* ins(mInstructions[i]); + InterInstruction* lins = nullptr; + bool flushMem = false; + + if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE) + { + if (ins->mSrc[0].mLinkerObject == mProc->mLinkerObject) + { + for (int j = 0; j < mLoadStoreInstructions.Size(); j++) + { + if (rtemps[mLoadStoreInstructions[j]->mDst.mTemp]) + mInstructions.Insert(i + 1, mLoadStoreInstructions[j]->Clone()); + } + } + } + else if (ins->mCode == IC_LOAD && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mMemory == paramMemory) + { + if (InterTypeSize[ins->mDst.mType] == ins->mSrc[0].mOperandSize) + lins = ins; + } + + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0 && ins->mSrc[j].mFinal) + rtemps -= ins->mSrc[j].mTemp; + } + + int j = 0, k = 0, t = ins->mDst.mTemp; + if (t >= 0 || IsObservable(ins->mCode)) + { + while (j < mLoadStoreInstructions.Size()) + { + if (DestroyingMem(mLoadStoreInstructions[j], ins)) + ; + else if (t != mLoadStoreInstructions[j]->mDst.mTemp) + mLoadStoreInstructions[k++] = mLoadStoreInstructions[j]; + + j++; + } + mLoadStoreInstructions.SetSize(k); + } + + if (lins) + mLoadStoreInstructions.Push(lins); + } + + if (mTrueJump) mTrueJump->ReduceRecursionTempSpilling(paramMemory, mLoadStoreInstructions); + if (mFalseJump) mFalseJump->ReduceRecursionTempSpilling(paramMemory, mLoadStoreInstructions); + } +} + + bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars) { bool changed = false; @@ -19820,6 +19940,15 @@ void InterCodeProcedure::EliminateAliasValues() DisassembleDebug("EliminateAliasValues"); } +void InterCodeProcedure::ReduceRecursionTempSpilling(InterMemory paramMemory) +{ + GrowingInstructionPtrArray gipa(nullptr); + ResetVisited(); + mEntryBlock->ReduceRecursionTempSpilling(paramMemory, gipa); + + DisassembleDebug("ReduceRecursionTempSpilling"); +} + void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory) { DisassembleDebug("Load/Store forwardingY"); @@ -19931,7 +20060,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "KeyExpansion"); + CheckFunc = !strcmp(mIdent->mString, "main"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -20822,6 +20951,9 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Reduced Temporaries"); + if (!mFastCallProcedure) + ReduceRecursionTempSpilling(paramMemory); + // Optimize for size MergeBasicBlocks(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 5e21024..2e7b336 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -472,6 +472,7 @@ public: void LinkerObjectForwarding(const GrowingInstructionPtrArray& tvalue); bool LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars); + void ReduceRecursionTempSpilling(InterMemory paramMemory, const GrowingInstructionPtrArray& tvalue); void LocalRenameRegister(const GrowingIntArray& renameTable, int& num); void BuildGlobalRenameRegisterTable(const GrowingIntArray& renameTable, GrowingIntArray& globalRenameTable); @@ -620,7 +621,7 @@ public: void CheckNullptrDereference(void); void CollectGlobalReferences(NumberSet& referencedGlobals, NumberSet& modifiedGlobals, bool & storesIndirect, bool & loadsIndirect, bool & globalsChecked); - + }; class InterCodeProcedure @@ -715,6 +716,7 @@ protected: void MergeIndexedLoadStore(void); void EliminateAliasValues(); void LoadStoreForwarding(InterMemory paramMemory); + void ReduceRecursionTempSpilling(InterMemory paramMemory); void ExpandSelect(void); void PropagateConstOperationsUp(void); void RebuildIntegerRangeSet(void); @@ -731,6 +733,7 @@ protected: void PropagateMemoryAliasingInfo(void); void MoveConditionsOutOfLoop(void); + void PeepholeOptimization(void); void CheckFinal(void);