Improve fix point arithmetic

2024-07-28 14:41:17 +02:00 · 2024-07-28 14:41:17 +02:00 · 5bb8ca0b89
parent 7ecd6adb20
commit 5bb8ca0b89
7 changed files with 495 additions and 90 deletions
--- a/autotest/autotest.bat
+++ b/autotest/autotest.bat
@ -183,6 +183,9 @@ rem @echo off
@call :test divmod32test.c
@if %errorlevel% neq 0 goto :error

+@call :test fixmathtest.c
+@if %errorlevel% neq 0 goto :error
+
@call :test enumswitch.c
@if %errorlevel% neq 0 goto :error

--- a/autotest/fixmathtest.c
+++ b/autotest/fixmathtest.c
@ -0,0 +1,90 @@
+#include <fixmath.h>
+#include <assert.h>
+#include <stdlib.h>
+
+unsigned	tval[] = {
+	1, 2, 16, 128, 255, 256, 4096, 32768, 65535
+};
+
+void testmuldiv16u(void)
+{
+	for (char i=0; i<9; i++)
+	{
+		assert(lmuldiv16u(tval[i], 0, tval[i]) == 0);
+		assert(lmuldiv16u(0, tval[i], tval[i]) == 0);
+		for(char j=0; j<9; j++)
+		{			
+			assert(lmuldiv16u(tval[i], tval[j], tval[i]) == tval[j]);
+			assert(lmuldiv16u(tval[j], tval[i], tval[i]) == tval[j]);
+		}
+	}
+
+	for(int i=0; i<10000; i++)
+	{
+		unsigned	a = rand();
+		unsigned	b = rand();
+		unsigned	c = rand();
+		if (c > 0)
+		{
+			unsigned long d = (unsigned long)a * (unsigned long) b / c;
+			if (d < 0x10000l)
+				assert(lmuldiv16u(a, b, c) == d);
+		}
+	}
+}
+
+unsigned	ival[] = {
+	1, 2, 16, 128, 255, 256, 4096, 32767, 
+	-1, -2, -16, -128, -255, -256, -4096, -32767
+};
+
+void testmuldiv16s(void)
+{
+	for (char i=0; i<16; i++)
+	{
+		assert(lmuldiv16s(ival[i], 0, ival[i]) == 0);
+		assert(lmuldiv16s(0, ival[i], ival[i]) == 0);
+		for(char j=0; j<16; j++)
+		{			
+			assert(lmuldiv16s(ival[i], ival[j], ival[i]) == ival[j]);
+			assert(lmuldiv16s(ival[j], ival[i], ival[i]) == ival[j]);
+		}
+	}
+
+	for(int i=0; i<10000; i++)
+	{
+		int	a = rand();
+		int	b = rand();
+		int	c = rand();
+
+		if (c > 0)
+		{
+			long d = (long)a * (long)b / c;
+			if (d >= -32768 && d <= 32767)
+				assert(lmuldiv16s(a, b, c) == d);
+		}
+	}
+
+}
+
+void testlmul4f12s(void)
+{
+	for(int i=0; i<20000; i++)
+	{
+		int	a = rand();
+		int	b = rand();
+
+		long d = ((long)a * (long)b) >> 12;
+		if (d >= -32768 && d <= 32767)
+			assert(lmul4f12s(a, b) == d);
+	}
+}
+
+int main(void)
+{
+	testlmul4f12s();
+	testmuldiv16u();
+	testmuldiv16s();
+
+	return 0;
+}
--- a/include/fixmath.c
+++ b/include/fixmath.c
@ -116,31 +116,37 @@ int lmul4f12s(int x, int y)
 {
 	__asm
 	{
-		bit y + 1
-		bpl W0
-
 		sec
-		lda #0
-		sbc y
-		sta y
-		lda #0
-		sbc y + 1
-		sta y + 1
+		lda x
+		ror
+		sta accu

-		sec
-		lda #0
-		sbc x
-		sta x
-		lda #0
-		sbc x + 1
-		sta x + 1
-W0:
-		ldx	#15
 		lda #0
 		sta	accu + 1
+L2:		
+		bcc	W4
+		tay
+		clc
+		lda	accu + 1
+		adc	y
+		sta accu + 1
+		tya
+		adc y + 1
+W4:
+		ror
+		ror accu + 1
+		
+		lsr accu
+		bcc W4
+		bne	L2

-L1:		lsr	x + 1
-		ror	x
+		ldx x + 1
+		stx accu
+
+		ldx	#7
+		lsr	accu
+
+L1:		
 		bcc	W1
 		tay
 		clc
@ -156,7 +162,6 @@ W1:
 		dex
 		bne	L1

-		lsr x
 		bcc W2

 		tay
@ -166,13 +171,22 @@ W1:
 		sta accu + 1
 		tya
 		sbc y + 1
-
-		sec
 W2:
 		ror
 		ror accu + 1
 		ror accu

+		bit y + 1
+		bpl W3
+
+		tax
+		sec
+		lda accu + 1
+		sbc x
+		sta accu + 1
+		txa
+		sbc x + 1
+W3:
 		lsr
 		ror accu + 1
 		ror accu
@ -270,15 +284,17 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
 	__asm
 	{
 			lda	#0
-			sta	__tmp + 0
-			sta	__tmp + 1
 			sta	__tmp + 2
 			sta	__tmp + 3

-			ldx	#16
-	L1:		lsr	a + 1
-			ror	a
+			lda a
+			sec
+	T1:
+			ldy #8
+	L1:		
+			ror
 			bcc	W1
+			tax
 			clc
 			lda	__tmp + 2
 			adc	b
@ -286,20 +302,38 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
 			lda	__tmp + 3
 			adc b + 1
 			sta	__tmp + 3
+			txa
 	W1:
 			ror __tmp + 3
 			ror __tmp + 2
-			ror __tmp + 1
-			ror __tmp
-			dex
+			dey
 			bne	L1
+			ror
+			bcc T2

-			lda	#0
-			sta accu
-			sta accu + 1
+			sta __tmp + 0
+			lda a + 1
+			clc
+			bcc T1

-			ldx #17
+	T2:
+			sec
+	L3:
+			sta __tmp + 1
+			ldx #8
 	L2:
+			rol __tmp + 1
+			rol __tmp + 2
+			rol __tmp + 3
+			bcc W3
+			lda __tmp + 2
+			sbc c
+			tay
+			lda __tmp + 3
+			sbc c + 1			
+			sec
+			bcs W4
+	W3:
 			sec
 			lda __tmp + 2
 			sbc c
@ -307,33 +341,23 @@ unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
 			lda __tmp + 3
 			sbc c + 1
 			bcc	W2
+	W4:
 			sta __tmp + 3
 			sty __tmp + 2
 	W2:
-			rol accu
-			rol accu + 1
-
-			asl __tmp
-			rol __tmp + 1
-			rol __tmp + 2
-			rol __tmp + 3
-
 			dex
-			beq E2
-			bcc L2
-
-			lda __tmp + 2
-			sbc c
-			sta __tmp + 2
-			lda __tmp + 3
-			sbc c + 1
-			sta __tmp + 3
-			sec
-			bcs W2
-	E2:
+			bne L2
+			lda __tmp + 1
+			rol
+			bcc T3

+			sta accu + 1
+			lda __tmp + 0
+			clc
+			bcc L3
+	T3:
+			sta accu
 	}
-
 }

 int lmuldiv16s(int a, int b, int c)
@ -358,15 +382,17 @@ int lmuldiv16s(int a, int b, int c)
 	__asm
 	{
 			lda	#0
-			sta	__tmp + 0
-			sta	__tmp + 1
 			sta	__tmp + 2
 			sta	__tmp + 3

-			ldx	#16
-	L1:		lsr	a + 1
-			ror	a
+			lda a
+			sec
+	T1:
+			ldy #8
+	L1:		
+			ror
 			bcc	W1
+			tax
 			clc
 			lda	__tmp + 2
 			adc	b
@ -374,20 +400,38 @@ int lmuldiv16s(int a, int b, int c)
 			lda	__tmp + 3
 			adc b + 1
 			sta	__tmp + 3
+			txa
 	W1:
 			ror __tmp + 3
 			ror __tmp + 2
-			ror __tmp + 1
-			ror __tmp
-			dex
+			dey
 			bne	L1
+			ror
+			bcc T2

-			lda	#0
-			sta accu
-			sta accu + 1
+			sta __tmp + 0
+			lda a + 1
+			clc
+			bcc T1

-			ldx #17
+	T2:
+			sec
+	L3:
+			sta __tmp + 1
+			ldx #8
 	L2:
+			rol __tmp + 1
+			rol __tmp + 2
+			rol __tmp + 3
+			bcc W3
+			lda __tmp + 2
+			sbc c
+			tay
+			lda __tmp + 3
+			sbc c + 1			
+			sec
+			bcs W4
+	W3:
 			sec
 			lda __tmp + 2
 			sbc c
@ -395,30 +439,23 @@ int lmuldiv16s(int a, int b, int c)
 			lda __tmp + 3
 			sbc c + 1
 			bcc	W2
+	W4:
 			sta __tmp + 3
 			sty __tmp + 2
 	W2:
-			rol accu
-			rol accu + 1
-
-			asl __tmp
-			rol __tmp + 1
-			rol __tmp + 2
-			rol __tmp + 3
-
 			dex
-			beq E2
-			bcc L2
+			bne L2
+			lda __tmp + 1
+			rol
+			bcc T3
+
+			sta accu + 1
+			lda __tmp + 0
+			clc
+			bcc L3
+	T3:
+			sta accu

-			lda __tmp + 2
-			sbc c
-			sta __tmp + 2
-			lda __tmp + 3
-			sbc c + 1
-			sta __tmp + 3
-			sec
-			bcs W2
-	E2:
 			lda	sign
 			beq	E1

--- a/oscar64/InterCode.cpp
+++ b/oscar64/InterCode.cpp
@ -123,6 +123,11 @@ bool IntegerValueRange::IsInvalid(void) const
 	return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue > mMaxValue;
 }

+bool IntegerValueRange::IsBound(void) const
+{
+	return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue <= mMaxValue;
+}
+
 bool IntegerValueRange::IsConstant(void) const
 {
 	return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue == mMaxValue;
@ -5539,6 +5544,12 @@ void InterCodeBasicBlock::Append(InterInstruction * code)
 	this->mInstructions.Push(code);
 }

+
+void InterCodeBasicBlock::AppendBeforeBranch(InterInstruction* code)
+{
+	mInstructions.Insert(mInstructions.Size() - 1, code);
+}
+
 const InterInstruction* InterCodeBasicBlock::FindByDst(int dst) const
 {
 	int n = mInstructions.Size() - 1;
@ -14355,6 +14366,55 @@ InterCodeBasicBlock* InterCodeBasicBlock::CheckIsConstBranch(const GrowingInstru
 					nins->mConst = tins[k]->mSrc[0];
 				}
 			}
+			else if (ins->mCode == IC_RELATIONAL_OPERATOR && IsIntegerType(ins->mSrc[0].mType))
+			{
+				IntegerValueRange	v0, v1;
+
+				if (ins->mSrc[0].mTemp < 0)
+					v0.SetLimit(ins->mSrc[0].mIntConst, ins->mSrc[0].mIntConst);
+				else
+				{
+					int k = 0;
+					while (k < tins.Size() && tins[k]->mDst.mTemp != ins->mSrc[0].mTemp)
+						k++;
+					if (k < tins.Size())
+						v0 = tins[k]->mDst.mRange;
+				}
+
+				if (ins->mSrc[1].mTemp < 0)
+					v1.SetLimit(ins->mSrc[1].mIntConst, ins->mSrc[1].mIntConst);
+				else
+				{
+					int k = 0;
+					while (k < tins.Size() && tins[k]->mDst.mTemp != ins->mSrc[1].mTemp)
+						k++;
+					if (k < tins.Size())
+						v1 = tins[k]->mDst.mRange;
+				}
+
+				if (v0.IsBound() && v1.IsBound())
+				{
+					if (ins->mOperator == IA_CMPEQ)
+					{
+						if (v0.IsConstant() && v1.IsConstant() && v1.mMinValue == v0.mMinValue)
+						{
+							nins = new InterInstruction(ins->mLocation, IC_CONSTANT);
+							nins->mDst = ins->mDst;
+							nins->mConst.mType = IT_BOOL;
+							nins->mConst.mIntConst = 1;
+						}
+						else if (v0.mMinValue > v1.mMaxValue || v1.mMinValue > v0.mMaxValue)
+						{
+							nins = new InterInstruction(ins->mLocation, IC_CONSTANT);
+							nins->mDst = ins->mDst;
+							nins->mConst.mType = IT_BOOL;
+							nins->mConst.mIntConst = 0;
+						}
+					}
+				}
+			}
+			else if (ins->mDst.mTemp >= 0 && ins->mDst.mRange.IsBound())
+				nins = ins;

 			if (ins->mDst.mTemp >= 0)
 			{
@ -14425,6 +14485,8 @@ bool InterCodeBasicBlock::ShortcutConstBranches(const GrowingInstructionPtrArray
 							k++;
 					}
 				}
+				else if (ins->mDst.mTemp >= 0 && ins->mDst.mRange.IsBound())
+					nins = ins;

 				if (ins->mCode == IC_STORE && !ins->mVolatile && ins->mSrc[0].mTemp < 0)
 					nins = ins;
@ -16102,6 +16164,43 @@ bool InterCodeBasicBlock::CheapInlining(int & numTemps)
 	return changed;
 }

+bool InterCodeBasicBlock::PullStoreUpToConstAddress(void)
+{
+	bool	changed = false;
+
+	if (!mVisited)
+	{
+		mVisited = true;
+
+		for (int i = 0; i < mInstructions.Size(); i++)
+		{
+			InterInstruction* ins = mInstructions[i], * cins;
+			if (ins->mCode == IC_STORE && ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && CanMoveInstructionBeforeBlock(i))
+			{
+				int j = 0;
+				while (j < mEntryBlocks.Size() && (cins = mEntryBlocks[j]->FindTempOrigin(ins->mSrc[1].mTemp)) && cins->mCode == IC_CONSTANT)
+					j++;
+
+				if (j == mEntryBlocks.Size())
+				{
+					for (int j = 0; j < mEntryBlocks.Size(); j++)
+						mEntryBlocks[j]->AppendBeforeBranch(ins->Clone());
+					changed = true;
+					mInstructions.Remove(i);
+					i--;
+				}
+			}
+		}
+
+		if (mTrueJump && mTrueJump->PullStoreUpToConstAddress())
+			changed = true;
+		if (mFalseJump && mFalseJump->PullStoreUpToConstAddress())
+			changed = true;
+	}
+
+	return false;
+}
+
 void InterCodeBasicBlock::RemoveUnusedMallocs(void)
 {
 	if (!mVisited)
@ -21628,7 +21727,7 @@ void InterCodeProcedure::Close(void)
 {
 	GrowingTypeArray	tstack(IT_NONE);

-	CheckFunc = !strcmp(mIdent->mString, "main");
+	CheckFunc = !strcmp(mIdent->mString, "bmu_line");
 	CheckCase = false;

 	mEntryBlock = mBlocks[0];
@ -22415,6 +22514,7 @@ void InterCodeProcedure::Close(void)
 	mEntryBlock->ForwardShortLoadStoreOffsets();
 	DisassembleDebug("ForwardShortLoadStoreOffsets");

+
 //	CollapseDispatch();
 //	DisassembleDebug("CollapseDispatch");

@ -22507,6 +22607,11 @@ void InterCodeProcedure::Close(void)
 	}
 #endif

+	BuildDataFlowSets();
+	ResetVisited();
+	mEntryBlock->PullStoreUpToConstAddress();
+	DisassembleDebug("PullStoreUpToConstAddress");
+
 	ConstLoopOptimization();

 	BuildDataFlowSets();
--- a/oscar64/InterCode.h
+++ b/oscar64/InterCode.h
@ -172,6 +172,7 @@ public:
 	void MergeUnknown(const IntegerValueRange& range);
 	void SetLimit(int64 minValue, int64 maxValue);

+	bool IsBound(void) const;
 	bool IsConstant(void) const;
 	bool IsInvalid(void) const;

@ -407,6 +408,7 @@ public:
 	InterCodeBasicBlock* Clone(void);

 	void Append(InterInstruction * code);
+	void AppendBeforeBranch(InterInstruction* code);
 	const InterInstruction* FindByDst(int dst) const;
 	void Close(InterCodeBasicBlock* trueJump, InterCodeBasicBlock* falseJump);

@ -605,6 +607,8 @@ public:
 	void PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue);
 	void RemoveUnusedMallocs(void);

+	bool PullStoreUpToConstAddress(void);
+
 	bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray<InterCodeBasicBlock*>& body);

 	bool SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars);
--- a/oscar64/NativeCodeGenerator.cpp
+++ b/oscar64/NativeCodeGenerator.cpp
@ -9447,6 +9447,11 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
 				sop0 = 1; sop1 = 0;
 				const InterInstruction* sins = sins0; sins0 = sins1; sins1 = sins;
 			}
+			else if (!sins0 && !sins1 && ins->mSrc[sop0].mTemp >= 0 && ins->mSrc[sop1].mTemp >= 0 && ins->mDst.mTemp == ins->mSrc[sop0].mTemp)
+			{
+				flipop = true;
+				sop0 = 1; sop1 = 0;
+			}
 		}

 		int	sreg0 = ins->mSrc[sop0].mTemp < 0 ? -1 : BC_REG_TMP + proc->mTempOffset[ins->mSrc[sop0].mTemp];
@ -11915,6 +11920,35 @@ void NativeCodeBasicBlock::SignExtendAddImmediate(InterCodeProcedure* proc, cons
 	mIns.Push(NativeCodeInstruction(ains, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ains->mDst.mTemp] + 1));
 }

+void NativeCodeBasicBlock::BinaryFloatOperatorLookup(InterCodeProcedure* proc, const InterInstruction* cins, const InterInstruction* ins)
+{
+	mIns.Push(NativeCodeInstruction(cins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[cins->mSrc[0].mTemp] + 0));
+	mIns.Push(NativeCodeInstruction(cins, ASMIT_SEC));
+	mIns.Push(NativeCodeInstruction(cins, ASMIT_SBC, ASMIM_IMMEDIATE, cins->mSrc[0].mRange.mMinValue));
+	mIns.Push(NativeCodeInstruction(cins, ASMIT_TAX));
+
+	bool	reverse = false;
+	double	fconst;
+
+	if (ins->mSrc[0].mTemp < 0)
+	{
+		fconst = ins->mSrc[0].mFloatConst;
+	}
+	else
+	{
+		fconst = ins->mSrc[1].mFloatConst;
+		if (ins->mOperator == IA_SUB || ins->mOperator == IA_DIVS)
+			reverse = true;
+	}
+
+	for (int i = 0; i < 4; i++)
+	{
+		mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, mProc->mGenerator->AllocateFloatTable(ins->mOperator, reverse,
+			int(cins->mSrc[0].mRange.mMinValue), int(cins->mSrc[0].mRange.mMaxValue), float(fconst), i)));
+		mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp] + i));
+	}
+}
+
 void NativeCodeBasicBlock::UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins)
 {
 	int	treg = BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp];
@ -50174,7 +50208,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
 	mInterProc = proc;
 	mInterProc->mLinkerObject->mNativeProc = this;

-	CheckFunc = !strcmp(mInterProc->mIdent->mString, "benchmark");
+	CheckFunc = !strcmp(mInterProc->mIdent->mString, "testpow");

 	int	nblocks = proc->mBlocks.Size();
 	tblocks = new NativeCodeBasicBlock * [nblocks];
@ -52353,6 +52387,17 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode
 				block->SignExtendAddImmediate(iproc, ins, iblock->mInstructions[i + 1]);
 				i++;
 			}
+			else if (i + 1 < iblock->mInstructions.Size() &&
+				(iproc->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) &&
+				(ins->mOperator == IA_INT2FLOAT || ins->mOperator == IA_UINT2FLOAT) &&
+				ins->mSrc[0].IsSByte() && ins->mSrc[0].mRange.mMaxValue - ins->mSrc[0].mRange.mMinValue < 16 &&
+				iblock->mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR &&
+				(iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal && iblock->mInstructions[i + 1]->mSrc[1].mTemp < 0 ||
+					iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal && iblock->mInstructions[i + 1]->mSrc[0].mTemp < 0))
+			{
+				block->BinaryFloatOperatorLookup(iproc, ins, iblock->mInstructions[i + 1]);
+				i++;
+			}
 			else
 				block->NumericConversion(iproc, this, ins);
 			break;
@ -52688,6 +52733,115 @@ void NativeCodeGenerator::PopulateShortMulTables(void)
 			}
 		}
 	}
+
+	for (int i = 0; i < mFloatTables.Size(); i++)
+	{
+		const FloatTable& f(mFloatTables[i]);
+		if (f.mLinker[0]->mSize != f.mMaxValue + 1 - f.mMinValue)
+		{
+			for (int j = 0; j < 4; j++)
+				f.mLinker[j]->AddSpace(f.mMaxValue + 1 - f.mMinValue);
+		}
+
+		for (int j = f.mMinValue; j <= f.mMaxValue; j++)
+		{
+			union {
+				float	f;
+				uint8	u[4];
+			}	fu;
+
+			switch (f.mOperator)
+			{
+			case IA_MUL:
+				fu.f = f.mConst * float(j);
+				break;
+			case IA_ADD:
+				fu.f = f.mConst * float(j);
+				break;
+			case IA_SUB:
+				if (f.mReverse)
+					fu.f = f.mConst - float(j);
+				else
+					fu.f = float(j) - f.mConst;
+				break;
+			case IA_DIVS:
+				if (f.mReverse)
+					fu.f = f.mConst / float(j);
+				else
+					fu.f = float(j) / f.mConst;
+				break;
+			}
+
+			for (int k = 0; k < 4; k++)
+				f.mLinker[k]->mData[j - f.mMinValue] = fu.u[k];
+		}
+	}
+}
+
+LinkerObject* NativeCodeGenerator::AllocateFloatTable(InterOperator op, bool reverse, int minval, int maxval, float fval, int index)
+{
+	int	i = 0;
+	while (i < mFloatTables.Size() && 
+		(mFloatTables[i].mOperator != op || 
+		 mFloatTables[i].mReverse != reverse ||
+		 mFloatTables[i].mConst != fval ||
+		 mFloatTables[i].mMinValue != minval))
+		i++;
+
+	if (i == mFloatTables.Size())
+	{
+		Location	loc;
+		FloatTable	ft;
+
+		char	name[60];
+		const char* base = "";
+
+		switch (op)
+		{
+		case IA_MUL:
+			base = "fmul";
+			break;
+		case IA_ADD:
+			base = "fadd";
+			break;
+		case IA_SUB:
+			if (reverse)
+				base = "frsub";
+			else
+				base = "fsub";
+			break;
+		case IA_DIVS:
+			if (reverse)
+				base = "frdiv";
+			else
+				base = "fdiv";
+			break;
+		}
+
+		for (int i = 0; i < 4; i++)
+		{
+			sprintf_s(name, "__%stab%d_%d_%f", base, i, minval, fval);
+			ft.mLinker[i] = mLinker->AddObject(loc, Ident::Unique(name), mRuntimeSection, LOT_DATA);
+			ft.mLinker[i]->mFlags |= LOBJF_CONST;
+		}
+
+		ft.mOperator = op;
+		ft.mReverse = reverse;
+		ft.mConst = fval;
+		ft.mMinValue = minval;
+		ft.mMaxValue = maxval;
+
+		mFloatTables.Push(ft);
+
+		return ft.mLinker[index];
+	}
+	else
+	{
+		if (maxval > mFloatTables[i].mMaxValue)
+			mFloatTables[i].mMaxValue = maxval;
+
+		return mFloatTables[i].mLinker[index];
+	}
 }

 LinkerObject* NativeCodeGenerator::AllocateShortMulTable(InterOperator op, int factor, int size, bool msb)
--- a/oscar64/NativeCodeGenerator.h
+++ b/oscar64/NativeCodeGenerator.h
@ -373,6 +373,7 @@ public:
 	void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins);
 	void SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains);
 	void BinaryDivModPair(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction* ins1, const InterInstruction* ins2);
+	void BinaryFloatOperatorLookup(InterCodeProcedure* proc, const InterInstruction* cins, const InterInstruction* ins);

 	void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins);
 	NativeCodeBasicBlock * FillValue(InterCodeProcedure* proc, const InterInstruction* ins, NativeCodeProcedure* nproc);
@ -868,7 +869,17 @@ public:
 		InterOperator		mOperator;
 	};

+	struct FloatTable
+	{
+		LinkerObject	*	mLinker[4];
+		float				mConst;
+		int					mMinValue, mMaxValue;
+		InterOperator		mOperator;
+		bool				mReverse;
+	};
+
 	LinkerObject* AllocateShortMulTable(InterOperator op, int factor, int size, bool msb);
+	LinkerObject* AllocateFloatTable(InterOperator op, bool reverse, int minval, int maxval, float fval, int index);
 	void PopulateShortMulTables(void);

 	Runtime& ResolveRuntime(const Ident* ident);
@ -877,8 +888,9 @@ public:
 	Linker* mLinker;
 	LinkerSection* mRuntimeSection;

-	ExpandingArray<Runtime>	mRuntime;
+	ExpandingArray<Runtime>		mRuntime;
 	ExpandingArray<MulTable>	mMulTables;
+	ExpandingArray<FloatTable>	mFloatTables;

 	struct FunctionCall
 	{