Improve fix point arithmetic

This commit is contained in:
drmortalwombat 2024-07-28 14:41:17 +02:00
parent 7ecd6adb20
commit 5bb8ca0b89
7 changed files with 495 additions and 90 deletions

View File

@ -183,6 +183,9 @@ rem @echo off
@call :test divmod32test.c
@if %errorlevel% neq 0 goto :error
@call :test fixmathtest.c
@if %errorlevel% neq 0 goto :error
@call :test enumswitch.c
@if %errorlevel% neq 0 goto :error

90
autotest/fixmathtest.c Normal file
View File

@ -0,0 +1,90 @@
#include <fixmath.h>
#include <assert.h>
#include <stdlib.h>
unsigned tval[] = {
1, 2, 16, 128, 255, 256, 4096, 32768, 65535
};
void testmuldiv16u(void)
{
for (char i=0; i<9; i++)
{
assert(lmuldiv16u(tval[i], 0, tval[i]) == 0);
assert(lmuldiv16u(0, tval[i], tval[i]) == 0);
for(char j=0; j<9; j++)
{
assert(lmuldiv16u(tval[i], tval[j], tval[i]) == tval[j]);
assert(lmuldiv16u(tval[j], tval[i], tval[i]) == tval[j]);
}
}
for(int i=0; i<10000; i++)
{
unsigned a = rand();
unsigned b = rand();
unsigned c = rand();
if (c > 0)
{
unsigned long d = (unsigned long)a * (unsigned long) b / c;
if (d < 0x10000l)
assert(lmuldiv16u(a, b, c) == d);
}
}
}
unsigned ival[] = {
1, 2, 16, 128, 255, 256, 4096, 32767,
-1, -2, -16, -128, -255, -256, -4096, -32767
};
void testmuldiv16s(void)
{
for (char i=0; i<16; i++)
{
assert(lmuldiv16s(ival[i], 0, ival[i]) == 0);
assert(lmuldiv16s(0, ival[i], ival[i]) == 0);
for(char j=0; j<16; j++)
{
assert(lmuldiv16s(ival[i], ival[j], ival[i]) == ival[j]);
assert(lmuldiv16s(ival[j], ival[i], ival[i]) == ival[j]);
}
}
for(int i=0; i<10000; i++)
{
int a = rand();
int b = rand();
int c = rand();
if (c > 0)
{
long d = (long)a * (long)b / c;
if (d >= -32768 && d <= 32767)
assert(lmuldiv16s(a, b, c) == d);
}
}
}
void testlmul4f12s(void)
{
for(int i=0; i<20000; i++)
{
int a = rand();
int b = rand();
long d = ((long)a * (long)b) >> 12;
if (d >= -32768 && d <= 32767)
assert(lmul4f12s(a, b) == d);
}
}
int main(void)
{
testlmul4f12s();
testmuldiv16u();
testmuldiv16s();
return 0;
}

View File

@ -116,31 +116,37 @@ int lmul4f12s(int x, int y)
{
__asm
{
bit y + 1
bpl W0
sec
lda #0
sbc y
sta y
lda #0
sbc y + 1
sta y + 1
lda x
ror
sta accu
sec
lda #0
sbc x
sta x
lda #0
sbc x + 1
sta x + 1
W0:
ldx #15
lda #0
sta accu + 1
L2:
bcc W4
tay
clc
lda accu + 1
adc y
sta accu + 1
tya
adc y + 1
W4:
ror
ror accu + 1
lsr accu
bcc W4
bne L2
L1: lsr x + 1
ror x
ldx x + 1
stx accu
ldx #7
lsr accu
L1:
bcc W1
tay
clc
@ -156,7 +162,6 @@ W1:
dex
bne L1
lsr x
bcc W2
tay
@ -166,13 +171,22 @@ W1:
sta accu + 1
tya
sbc y + 1
sec
W2:
ror
ror accu + 1
ror accu
bit y + 1
bpl W3
tax
sec
lda accu + 1
sbc x
sta accu + 1
txa
sbc x + 1
W3:
lsr
ror accu + 1
ror accu
@ -270,15 +284,17 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
__asm
{
lda #0
sta __tmp + 0
sta __tmp + 1
sta __tmp + 2
sta __tmp + 3
ldx #16
L1: lsr a + 1
ror a
lda a
sec
T1:
ldy #8
L1:
ror
bcc W1
tax
clc
lda __tmp + 2
adc b
@ -286,20 +302,38 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
lda __tmp + 3
adc b + 1
sta __tmp + 3
txa
W1:
ror __tmp + 3
ror __tmp + 2
ror __tmp + 1
ror __tmp
dex
dey
bne L1
ror
bcc T2
lda #0
sta accu
sta accu + 1
sta __tmp + 0
lda a + 1
clc
bcc T1
ldx #17
T2:
sec
L3:
sta __tmp + 1
ldx #8
L2:
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
bcc W3
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
sec
bcs W4
W3:
sec
lda __tmp + 2
sbc c
@ -307,33 +341,23 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
lda __tmp + 3
sbc c + 1
bcc W2
W4:
sta __tmp + 3
sty __tmp + 2
W2:
rol accu
rol accu + 1
asl __tmp
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
dex
beq E2
bcc L2
lda __tmp + 2
sbc c
sta __tmp + 2
lda __tmp + 3
sbc c + 1
sta __tmp + 3
sec
bcs W2
E2:
bne L2
lda __tmp + 1
rol
bcc T3
sta accu + 1
lda __tmp + 0
clc
bcc L3
T3:
sta accu
}
}
int lmuldiv16s(int a, int b, int c)
@ -358,15 +382,17 @@ int lmuldiv16s(int a, int b, int c)
__asm
{
lda #0
sta __tmp + 0
sta __tmp + 1
sta __tmp + 2
sta __tmp + 3
ldx #16
L1: lsr a + 1
ror a
lda a
sec
T1:
ldy #8
L1:
ror
bcc W1
tax
clc
lda __tmp + 2
adc b
@ -374,20 +400,38 @@ int lmuldiv16s(int a, int b, int c)
lda __tmp + 3
adc b + 1
sta __tmp + 3
txa
W1:
ror __tmp + 3
ror __tmp + 2
ror __tmp + 1
ror __tmp
dex
dey
bne L1
ror
bcc T2
lda #0
sta accu
sta accu + 1
sta __tmp + 0
lda a + 1
clc
bcc T1
ldx #17
T2:
sec
L3:
sta __tmp + 1
ldx #8
L2:
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
bcc W3
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
sec
bcs W4
W3:
sec
lda __tmp + 2
sbc c
@ -395,30 +439,23 @@ int lmuldiv16s(int a, int b, int c)
lda __tmp + 3
sbc c + 1
bcc W2
W4:
sta __tmp + 3
sty __tmp + 2
W2:
rol accu
rol accu + 1
asl __tmp
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
dex
beq E2
bcc L2
bne L2
lda __tmp + 1
rol
bcc T3
sta accu + 1
lda __tmp + 0
clc
bcc L3
T3:
sta accu
lda __tmp + 2
sbc c
sta __tmp + 2
lda __tmp + 3
sbc c + 1
sta __tmp + 3
sec
bcs W2
E2:
lda sign
beq E1

View File

@ -123,6 +123,11 @@ bool IntegerValueRange::IsInvalid(void) const
return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue > mMaxValue;
}
bool IntegerValueRange::IsBound(void) const
{
return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue <= mMaxValue;
}
bool IntegerValueRange::IsConstant(void) const
{
return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue == mMaxValue;
@ -5539,6 +5544,12 @@ void InterCodeBasicBlock::Append(InterInstruction * code)
this->mInstructions.Push(code);
}
void InterCodeBasicBlock::AppendBeforeBranch(InterInstruction* code)
{
mInstructions.Insert(mInstructions.Size() - 1, code);
}
const InterInstruction* InterCodeBasicBlock::FindByDst(int dst) const
{
int n = mInstructions.Size() - 1;
@ -14355,6 +14366,55 @@ InterCodeBasicBlock* InterCodeBasicBlock::CheckIsConstBranch(const GrowingInstru
nins->mConst = tins[k]->mSrc[0];
}
}
else if (ins->mCode == IC_RELATIONAL_OPERATOR && IsIntegerType(ins->mSrc[0].mType))
{
IntegerValueRange v0, v1;
if (ins->mSrc[0].mTemp < 0)
v0.SetLimit(ins->mSrc[0].mIntConst, ins->mSrc[0].mIntConst);
else
{
int k = 0;
while (k < tins.Size() && tins[k]->mDst.mTemp != ins->mSrc[0].mTemp)
k++;
if (k < tins.Size())
v0 = tins[k]->mDst.mRange;
}
if (ins->mSrc[1].mTemp < 0)
v1.SetLimit(ins->mSrc[1].mIntConst, ins->mSrc[1].mIntConst);
else
{
int k = 0;
while (k < tins.Size() && tins[k]->mDst.mTemp != ins->mSrc[1].mTemp)
k++;
if (k < tins.Size())
v1 = tins[k]->mDst.mRange;
}
if (v0.IsBound() && v1.IsBound())
{
if (ins->mOperator == IA_CMPEQ)
{
if (v0.IsConstant() && v1.IsConstant() && v1.mMinValue == v0.mMinValue)
{
nins = new InterInstruction(ins->mLocation, IC_CONSTANT);
nins->mDst = ins->mDst;
nins->mConst.mType = IT_BOOL;
nins->mConst.mIntConst = 1;
}
else if (v0.mMinValue > v1.mMaxValue || v1.mMinValue > v0.mMaxValue)
{
nins = new InterInstruction(ins->mLocation, IC_CONSTANT);
nins->mDst = ins->mDst;
nins->mConst.mType = IT_BOOL;
nins->mConst.mIntConst = 0;
}
}
}
}
else if (ins->mDst.mTemp >= 0 && ins->mDst.mRange.IsBound())
nins = ins;
if (ins->mDst.mTemp >= 0)
{
@ -14425,6 +14485,8 @@ bool InterCodeBasicBlock::ShortcutConstBranches(const GrowingInstructionPtrArray
k++;
}
}
else if (ins->mDst.mTemp >= 0 && ins->mDst.mRange.IsBound())
nins = ins;
if (ins->mCode == IC_STORE && !ins->mVolatile && ins->mSrc[0].mTemp < 0)
nins = ins;
@ -16102,6 +16164,43 @@ bool InterCodeBasicBlock::CheapInlining(int & numTemps)
return changed;
}
bool InterCodeBasicBlock::PullStoreUpToConstAddress(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
for (int i = 0; i < mInstructions.Size(); i++)
{
InterInstruction* ins = mInstructions[i], * cins;
if (ins->mCode == IC_STORE && ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && CanMoveInstructionBeforeBlock(i))
{
int j = 0;
while (j < mEntryBlocks.Size() && (cins = mEntryBlocks[j]->FindTempOrigin(ins->mSrc[1].mTemp)) && cins->mCode == IC_CONSTANT)
j++;
if (j == mEntryBlocks.Size())
{
for (int j = 0; j < mEntryBlocks.Size(); j++)
mEntryBlocks[j]->AppendBeforeBranch(ins->Clone());
changed = true;
mInstructions.Remove(i);
i--;
}
}
}
if (mTrueJump && mTrueJump->PullStoreUpToConstAddress())
changed = true;
if (mFalseJump && mFalseJump->PullStoreUpToConstAddress())
changed = true;
}
return false;
}
void InterCodeBasicBlock::RemoveUnusedMallocs(void)
{
if (!mVisited)
@ -21628,7 +21727,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "main");
CheckFunc = !strcmp(mIdent->mString, "bmu_line");
CheckCase = false;
mEntryBlock = mBlocks[0];
@ -22415,6 +22514,7 @@ void InterCodeProcedure::Close(void)
mEntryBlock->ForwardShortLoadStoreOffsets();
DisassembleDebug("ForwardShortLoadStoreOffsets");
// CollapseDispatch();
// DisassembleDebug("CollapseDispatch");
@ -22507,6 +22607,11 @@ void InterCodeProcedure::Close(void)
}
#endif
BuildDataFlowSets();
ResetVisited();
mEntryBlock->PullStoreUpToConstAddress();
DisassembleDebug("PullStoreUpToConstAddress");
ConstLoopOptimization();
BuildDataFlowSets();

View File

@ -172,6 +172,7 @@ public:
void MergeUnknown(const IntegerValueRange& range);
void SetLimit(int64 minValue, int64 maxValue);
bool IsBound(void) const;
bool IsConstant(void) const;
bool IsInvalid(void) const;
@ -407,6 +408,7 @@ public:
InterCodeBasicBlock* Clone(void);
void Append(InterInstruction * code);
void AppendBeforeBranch(InterInstruction* code);
const InterInstruction* FindByDst(int dst) const;
void Close(InterCodeBasicBlock* trueJump, InterCodeBasicBlock* falseJump);
@ -605,6 +607,8 @@ public:
void PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue);
void RemoveUnusedMallocs(void);
bool PullStoreUpToConstAddress(void);
bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray<InterCodeBasicBlock*>& body);
bool SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars);

View File

@ -9447,6 +9447,11 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
sop0 = 1; sop1 = 0;
const InterInstruction* sins = sins0; sins0 = sins1; sins1 = sins;
}
else if (!sins0 && !sins1 && ins->mSrc[sop0].mTemp >= 0 && ins->mSrc[sop1].mTemp >= 0 && ins->mDst.mTemp == ins->mSrc[sop0].mTemp)
{
flipop = true;
sop0 = 1; sop1 = 0;
}
}
int sreg0 = ins->mSrc[sop0].mTemp < 0 ? -1 : BC_REG_TMP + proc->mTempOffset[ins->mSrc[sop0].mTemp];
@ -11915,6 +11920,35 @@ void NativeCodeBasicBlock::SignExtendAddImmediate(InterCodeProcedure* proc, cons
mIns.Push(NativeCodeInstruction(ains, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ains->mDst.mTemp] + 1));
}
void NativeCodeBasicBlock::BinaryFloatOperatorLookup(InterCodeProcedure* proc, const InterInstruction* cins, const InterInstruction* ins)
{
mIns.Push(NativeCodeInstruction(cins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[cins->mSrc[0].mTemp] + 0));
mIns.Push(NativeCodeInstruction(cins, ASMIT_SEC));
mIns.Push(NativeCodeInstruction(cins, ASMIT_SBC, ASMIM_IMMEDIATE, cins->mSrc[0].mRange.mMinValue));
mIns.Push(NativeCodeInstruction(cins, ASMIT_TAX));
bool reverse = false;
double fconst;
if (ins->mSrc[0].mTemp < 0)
{
fconst = ins->mSrc[0].mFloatConst;
}
else
{
fconst = ins->mSrc[1].mFloatConst;
if (ins->mOperator == IA_SUB || ins->mOperator == IA_DIVS)
reverse = true;
}
for (int i = 0; i < 4; i++)
{
mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, mProc->mGenerator->AllocateFloatTable(ins->mOperator, reverse,
int(cins->mSrc[0].mRange.mMinValue), int(cins->mSrc[0].mRange.mMaxValue), float(fconst), i)));
mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp] + i));
}
}
void NativeCodeBasicBlock::UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins)
{
int treg = BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp];
@ -50174,7 +50208,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mInterProc = proc;
mInterProc->mLinkerObject->mNativeProc = this;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "benchmark");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "testpow");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];
@ -52353,6 +52387,17 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode
block->SignExtendAddImmediate(iproc, ins, iblock->mInstructions[i + 1]);
i++;
}
else if (i + 1 < iblock->mInstructions.Size() &&
(iproc->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) &&
(ins->mOperator == IA_INT2FLOAT || ins->mOperator == IA_UINT2FLOAT) &&
ins->mSrc[0].IsSByte() && ins->mSrc[0].mRange.mMaxValue - ins->mSrc[0].mRange.mMinValue < 16 &&
iblock->mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR &&
(iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal && iblock->mInstructions[i + 1]->mSrc[1].mTemp < 0 ||
iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal && iblock->mInstructions[i + 1]->mSrc[0].mTemp < 0))
{
block->BinaryFloatOperatorLookup(iproc, ins, iblock->mInstructions[i + 1]);
i++;
}
else
block->NumericConversion(iproc, this, ins);
break;
@ -52688,6 +52733,115 @@ void NativeCodeGenerator::PopulateShortMulTables(void)
}
}
}
for (int i = 0; i < mFloatTables.Size(); i++)
{
const FloatTable& f(mFloatTables[i]);
if (f.mLinker[0]->mSize != f.mMaxValue + 1 - f.mMinValue)
{
for (int j = 0; j < 4; j++)
f.mLinker[j]->AddSpace(f.mMaxValue + 1 - f.mMinValue);
}
for (int j = f.mMinValue; j <= f.mMaxValue; j++)
{
union {
float f;
uint8 u[4];
} fu;
switch (f.mOperator)
{
case IA_MUL:
fu.f = f.mConst * float(j);
break;
case IA_ADD:
fu.f = f.mConst * float(j);
break;
case IA_SUB:
if (f.mReverse)
fu.f = f.mConst - float(j);
else
fu.f = float(j) - f.mConst;
break;
case IA_DIVS:
if (f.mReverse)
fu.f = f.mConst / float(j);
else
fu.f = float(j) / f.mConst;
break;
}
for (int k = 0; k < 4; k++)
f.mLinker[k]->mData[j - f.mMinValue] = fu.u[k];
}
}
}
LinkerObject* NativeCodeGenerator::AllocateFloatTable(InterOperator op, bool reverse, int minval, int maxval, float fval, int index)
{
int i = 0;
while (i < mFloatTables.Size() &&
(mFloatTables[i].mOperator != op ||
mFloatTables[i].mReverse != reverse ||
mFloatTables[i].mConst != fval ||
mFloatTables[i].mMinValue != minval))
i++;
if (i == mFloatTables.Size())
{
Location loc;
FloatTable ft;
char name[60];
const char* base = "";
switch (op)
{
case IA_MUL:
base = "fmul";
break;
case IA_ADD:
base = "fadd";
break;
case IA_SUB:
if (reverse)
base = "frsub";
else
base = "fsub";
break;
case IA_DIVS:
if (reverse)
base = "frdiv";
else
base = "fdiv";
break;
}
for (int i = 0; i < 4; i++)
{
sprintf_s(name, "__%stab%d_%d_%f", base, i, minval, fval);
ft.mLinker[i] = mLinker->AddObject(loc, Ident::Unique(name), mRuntimeSection, LOT_DATA);
ft.mLinker[i]->mFlags |= LOBJF_CONST;
}
ft.mOperator = op;
ft.mReverse = reverse;
ft.mConst = fval;
ft.mMinValue = minval;
ft.mMaxValue = maxval;
mFloatTables.Push(ft);
return ft.mLinker[index];
}
else
{
if (maxval > mFloatTables[i].mMaxValue)
mFloatTables[i].mMaxValue = maxval;
return mFloatTables[i].mLinker[index];
}
}
LinkerObject* NativeCodeGenerator::AllocateShortMulTable(InterOperator op, int factor, int size, bool msb)

View File

@ -373,6 +373,7 @@ public:
void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins);
void SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains);
void BinaryDivModPair(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction* ins1, const InterInstruction* ins2);
void BinaryFloatOperatorLookup(InterCodeProcedure* proc, const InterInstruction* cins, const InterInstruction* ins);
void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins);
NativeCodeBasicBlock * FillValue(InterCodeProcedure* proc, const InterInstruction* ins, NativeCodeProcedure* nproc);
@ -868,7 +869,17 @@ public:
InterOperator mOperator;
};
struct FloatTable
{
LinkerObject * mLinker[4];
float mConst;
int mMinValue, mMaxValue;
InterOperator mOperator;
bool mReverse;
};
LinkerObject* AllocateShortMulTable(InterOperator op, int factor, int size, bool msb);
LinkerObject* AllocateFloatTable(InterOperator op, bool reverse, int minval, int maxval, float fval, int index);
void PopulateShortMulTables(void);
Runtime& ResolveRuntime(const Ident* ident);
@ -877,8 +888,9 @@ public:
Linker* mLinker;
LinkerSection* mRuntimeSection;
ExpandingArray<Runtime> mRuntime;
ExpandingArray<Runtime> mRuntime;
ExpandingArray<MulTable> mMulTables;
ExpandingArray<FloatTable> mFloatTables;
struct FunctionCall
{