Optimized long multiply

This commit is contained in:
drmortalwombat 2024-02-17 12:06:15 +01:00
parent b84cce7609
commit 6f7efd9efc
5 changed files with 250 additions and 38 deletions

View File

@ -232,115 +232,115 @@ echo Failed with error #%errorlevel%.
exit /b %errorlevel% exit /b %errorlevel%
:testh :testh
..\release\oscar64 -e -bc %~1 ..\bin\oscar64 -e -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -n %~1 ..\bin\oscar64 -e -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -bc %~1 ..\bin\oscar64 -e -O2 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -n %~1 ..\bin\oscar64 -e -O2 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -n -dHEAPCHECK %~1 ..\bin\oscar64 -e -O2 -n -dHEAPCHECK %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O0 -bc %~1 ..\bin\oscar64 -e -O0 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O0 -n %~1 ..\bin\oscar64 -e -O0 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -Os -bc %~1 ..\bin\oscar64 -e -Os -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -Os -n %~1 ..\bin\oscar64 -e -Os -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -bc %~1 ..\bin\oscar64 -e -O3 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -n %~1 ..\bin\oscar64 -e -O3 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -n -dHEAPCHECK %~1 ..\bin\oscar64 -e -O3 -n -dHEAPCHECK %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
@exit /b 0 @exit /b 0
:test :test
..\release\oscar64 -e -bc %~1 ..\bin\oscar64 -e -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -n %~1 ..\bin\oscar64 -e -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -bc %~1 ..\bin\oscar64 -e -O2 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -n %~1 ..\bin\oscar64 -e -O2 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O0 -bc %~1 ..\bin\oscar64 -e -O0 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O0 -n %~1 ..\bin\oscar64 -e -O0 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -Os -bc %~1 ..\bin\oscar64 -e -Os -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -Os -n %~1 ..\bin\oscar64 -e -Os -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -bc %~1 ..\bin\oscar64 -e -O3 -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -n %~1 ..\bin\oscar64 -e -O3 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -xz -Oz -n %~1 ..\bin\oscar64 -e -O2 -xz -Oz -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
@exit /b 0 @exit /b 0
:testb :testb
..\release\oscar64 -e -bc %~1 ..\bin\oscar64 -e -bc %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -bc -O2 %~1 ..\bin\oscar64 -e -bc -O2 %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -bc -O0 %~1 ..\bin\oscar64 -e -bc -O0 %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -bc -Os %~1 ..\bin\oscar64 -e -bc -Os %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -bc -O3 %~1 ..\bin\oscar64 -e -bc -O3 %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
@exit /b 0 @exit /b 0
:testn :testn
..\release\oscar64 -e -n %~1 ..\bin\oscar64 -e -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -n %~1 ..\bin\oscar64 -e -O2 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O0 -n %~1 ..\bin\oscar64 -e -O0 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -Os -n %~1 ..\bin\oscar64 -e -Os -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O3 -n %~1 ..\bin\oscar64 -e -O3 -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
..\release\oscar64 -e -O2 -xz -Oz -n %~1 ..\bin\oscar64 -e -O2 -xz -Oz -n %~1
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
@exit /b 0 @exit /b 0

View File

@ -5,6 +5,11 @@ void testmuli(long a, long b, long ab)
assert (a * b == ab); assert (a * b == ab);
} }
void testmulu(unsigned long a, unsigned long b, unsigned long ab)
{
assert (a * b == ab);
}
void testdivi(long a, long b, long ab) void testdivi(long a, long b, long ab)
{ {
assert (a / b == ab); assert (a / b == ab);
@ -80,6 +85,26 @@ int main(void)
testmuli( -1024, 1237, -1266688l); testmuli( -1024, 1237, -1266688l);
testmuli( -1024,-1237, 1266688l); testmuli( -1024,-1237, 1266688l);
testmulu(0x00000001, 0x0000003c, 0x0000003c);
testmulu(0x00000100, 0x0000003c, 0x00003c00);
testmulu(0x00010000, 0x0000003c, 0x003c0000);
testmulu(0x01000000, 0x0000003c, 0x3c000000);
testmulu(0x0000003c, 0x00000001, 0x0000003c);
testmulu(0x0000003c, 0x00000100, 0x00003c00);
testmulu(0x0000003c, 0x00010000, 0x003c0000);
testmulu(0x0000003c, 0x01000000, 0x3c000000);
testmulu(0x0000004b, 0x0000003c, 0x00001194);
testmulu(0x00004b00, 0x0000003c, 0x00119400);
testmulu(0x004b0000, 0x0000003c, 0x11940000);
testmulu(0x4b000000, 0x0000003c, 0x94000000);
testmulu(0x0000003c, 0x0000004b, 0x00001194);
testmulu(0x0000003c, 0x00004b00, 0x00119400);
testmulu(0x0000003c, 0x004b0000, 0x11940000);
testmulu(0x0000003c, 0x4b000000, 0x94000000);
testdivi( 1, 1, 1); testdivi( 1, 1, 1);
testdivi(-1, 1, -1); testdivi(-1, 1, -1);
testdivi( 1, -1, -1); testdivi( 1, -1, -1);

View File

@ -695,6 +695,11 @@ L4:
sty tmp + 2 sty tmp + 2
} }
__asm mul32by8
{
}
__asm mul32 __asm mul32
{ {
lda #0 lda #0
@ -703,6 +708,52 @@ __asm mul32
sta tmp + 6 sta tmp + 6
sta tmp + 7 sta tmp + 7
lda tmp + 0
jsr WM
lda tmp + 1
jsr WM
lda tmp + 2
jsr WM
lda tmp + 3
WM:
bne W0
ldx accu + 2
stx accu + 3
ldx accu + 1
stx accu + 2
ldx accu
stx accu + 1
sta accu
rts
W0:
sec
ror
bcc W1
L1: tax
clc
lda tmp + 4
adc accu
sta tmp + 4
lda tmp + 5
adc accu + 1
sta tmp + 5
lda tmp + 6
adc accu + 2
sta tmp + 6
lda tmp + 7
adc accu + 3
sta tmp + 7
txa
W1: asl accu
rol accu + 1
rol accu + 2
rol accu + 3
lsr
bcc W1
bne L1
rts
#if 0
ldx #32 ldx #32
L1: lsr tmp + 3 L1: lsr tmp + 3
ror tmp + 2 ror tmp + 2
@ -729,6 +780,7 @@ W1: asl accu
dex dex
bne L1 bne L1
rts rts
#endif
} }
__asm mul16by8 __asm mul16by8

View File

@ -967,8 +967,11 @@ static int64 ConstantFolding(InterOperator oper, InterType type, int64 val1, int
return val1 - val2; return val1 - val2;
break; break;
case IA_MUL: case IA_MUL:
if (type == IT_INT32 && val1 >= 0 && val2 >= 0)
return val1 * val2 & 0xffffffff;
else
return val1 * val2; return val1 * val2;
break;
case IA_DIVU: case IA_DIVU:
if (val2) if (val2)
return (uint64)val1 / (uint64)val2; return (uint64)val1 / (uint64)val2;
@ -11042,6 +11045,123 @@ void InterCodeBasicBlock::LinkerObjectForwarding(const GrowingInstructionPtrArra
} }
} }
void InterCodeBasicBlock::ReduceRecursionTempSpilling(InterMemory paramMemory, const GrowingInstructionPtrArray& tvalue)
{
if (!mVisited)
{
if (!mLoopHead)
{
if (mNumEntries > 0)
{
if (mNumEntered == 0)
mLoadStoreInstructions = tvalue;
else
{
int i = 0;
while (i < mLoadStoreInstructions.Size())
{
InterInstruction* ins(mLoadStoreInstructions[i]);
InterInstruction* nins = nullptr;
int j = tvalue.IndexOf(ins);
if (j != -1)
nins = ins;
if (nins)
mLoadStoreInstructions[i++] = nins;
else
mLoadStoreInstructions.Remove(i);
}
}
mNumEntered++;
if (mNumEntered < mNumEntries)
return;
}
}
#if 1
else if (mNumEntries == 2 && (mTrueJump == this || mFalseJump == this))
{
mLoadStoreInstructions = tvalue;
for (int i = 0; i < mInstructions.Size(); i++)
{
InterInstruction* ins(mInstructions[i]);
if (ins->mDst.mTemp >= 0)
{
int j = 0;
while (j < mLoadStoreInstructions.Size())
{
if (mLoadStoreInstructions[j]->ReferencesTemp(ins->mDst.mTemp) || CollidingMem(ins, mLoadStoreInstructions[j]))
mLoadStoreInstructions.Remove(j);
else
j++;
}
}
}
}
#endif
else
mLoadStoreInstructions.SetSize(0);
mVisited = true;
NumberSet rtemps(mEntryRequiredTemps);
for (int i = 0; i < mInstructions.Size(); i++)
{
InterInstruction* ins(mInstructions[i]);
InterInstruction* lins = nullptr;
bool flushMem = false;
if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE)
{
if (ins->mSrc[0].mLinkerObject == mProc->mLinkerObject)
{
for (int j = 0; j < mLoadStoreInstructions.Size(); j++)
{
if (rtemps[mLoadStoreInstructions[j]->mDst.mTemp])
mInstructions.Insert(i + 1, mLoadStoreInstructions[j]->Clone());
}
}
}
else if (ins->mCode == IC_LOAD && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mMemory == paramMemory)
{
if (InterTypeSize[ins->mDst.mType] == ins->mSrc[0].mOperandSize)
lins = ins;
}
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp >= 0 && ins->mSrc[j].mFinal)
rtemps -= ins->mSrc[j].mTemp;
}
int j = 0, k = 0, t = ins->mDst.mTemp;
if (t >= 0 || IsObservable(ins->mCode))
{
while (j < mLoadStoreInstructions.Size())
{
if (DestroyingMem(mLoadStoreInstructions[j], ins))
;
else if (t != mLoadStoreInstructions[j]->mDst.mTemp)
mLoadStoreInstructions[k++] = mLoadStoreInstructions[j];
j++;
}
mLoadStoreInstructions.SetSize(k);
}
if (lins)
mLoadStoreInstructions.Push(lins);
}
if (mTrueJump) mTrueJump->ReduceRecursionTempSpilling(paramMemory, mLoadStoreInstructions);
if (mFalseJump) mFalseJump->ReduceRecursionTempSpilling(paramMemory, mLoadStoreInstructions);
}
}
bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars) bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars)
{ {
bool changed = false; bool changed = false;
@ -19820,6 +19940,15 @@ void InterCodeProcedure::EliminateAliasValues()
DisassembleDebug("EliminateAliasValues"); DisassembleDebug("EliminateAliasValues");
} }
void InterCodeProcedure::ReduceRecursionTempSpilling(InterMemory paramMemory)
{
GrowingInstructionPtrArray gipa(nullptr);
ResetVisited();
mEntryBlock->ReduceRecursionTempSpilling(paramMemory, gipa);
DisassembleDebug("ReduceRecursionTempSpilling");
}
void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory) void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory)
{ {
DisassembleDebug("Load/Store forwardingY"); DisassembleDebug("Load/Store forwardingY");
@ -19931,7 +20060,7 @@ void InterCodeProcedure::Close(void)
{ {
GrowingTypeArray tstack(IT_NONE); GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "KeyExpansion"); CheckFunc = !strcmp(mIdent->mString, "main");
CheckCase = false; CheckCase = false;
mEntryBlock = mBlocks[0]; mEntryBlock = mBlocks[0];
@ -20822,6 +20951,9 @@ void InterCodeProcedure::Close(void)
DisassembleDebug("Reduced Temporaries"); DisassembleDebug("Reduced Temporaries");
if (!mFastCallProcedure)
ReduceRecursionTempSpilling(paramMemory);
// Optimize for size // Optimize for size
MergeBasicBlocks(); MergeBasicBlocks();

View File

@ -472,6 +472,7 @@ public:
void LinkerObjectForwarding(const GrowingInstructionPtrArray& tvalue); void LinkerObjectForwarding(const GrowingInstructionPtrArray& tvalue);
bool LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars); bool LoadStoreForwarding(const GrowingInstructionPtrArray& tvalue, const GrowingVariableArray& staticVars);
void ReduceRecursionTempSpilling(InterMemory paramMemory, const GrowingInstructionPtrArray& tvalue);
void LocalRenameRegister(const GrowingIntArray& renameTable, int& num); void LocalRenameRegister(const GrowingIntArray& renameTable, int& num);
void BuildGlobalRenameRegisterTable(const GrowingIntArray& renameTable, GrowingIntArray& globalRenameTable); void BuildGlobalRenameRegisterTable(const GrowingIntArray& renameTable, GrowingIntArray& globalRenameTable);
@ -715,6 +716,7 @@ protected:
void MergeIndexedLoadStore(void); void MergeIndexedLoadStore(void);
void EliminateAliasValues(); void EliminateAliasValues();
void LoadStoreForwarding(InterMemory paramMemory); void LoadStoreForwarding(InterMemory paramMemory);
void ReduceRecursionTempSpilling(InterMemory paramMemory);
void ExpandSelect(void); void ExpandSelect(void);
void PropagateConstOperationsUp(void); void PropagateConstOperationsUp(void);
void RebuildIntegerRangeSet(void); void RebuildIntegerRangeSet(void);
@ -731,6 +733,7 @@ protected:
void PropagateMemoryAliasingInfo(void); void PropagateMemoryAliasingInfo(void);
void MoveConditionsOutOfLoop(void); void MoveConditionsOutOfLoop(void);
void PeepholeOptimization(void); void PeepholeOptimization(void);
void CheckFinal(void); void CheckFinal(void);