Fix floating point native code generation

2021-09-09 09:42:02 +02:00 · 2021-09-09 09:42:02 +02:00 · 3442ee507a
parent fcedf69aff
commit 3442ee507a
4 changed files with 1089 additions and 471 deletions
--- a/include/crt.c
+++ b/include/crt.c
@ -1767,7 +1767,7 @@ split_exp:
 		sta	tmp + 2
 		lda	$03, x
 		sta	tmp + 3
-
+split_texp:
 		lda	tmp + 2
 		asl	
 		lda	tmp + 3
@ -1805,10 +1805,8 @@ W1:
 		rts
 }
-__asm inp_binop_add_f32
+__asm faddsub
 {
 		jsr	freg.split_exp
 faddsub:
 		sec
 		lda	tmp + 4
 		sbc	tmp + 5
@ -1881,7 +1879,7 @@ fas_done:
 		and	#$7f
 		sta	accu + 2
 W2:				
-		jmp	startup.exec
+		rts
 fas_sub:	
 		sec
@ -1929,6 +1927,15 @@ fas_zero:
 		sta	accu + 1
 		sta	accu + 2
 		sta	accu + 3
 		rts
 }	
 __asm inp_binop_add_f32
 {
 		jsr	freg.split_exp
 		jsr faddsub
 		jmp	startup.exec
 }
@ -1940,7 +1947,8 @@ __asm inp_binop_sub_f32
 		lda	tmp + 3
 		eor	#$80
 		sta	tmp + 3
-		jmp	inp_binop_add_f32.faddsub
+		jsr	faddsub
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_BINOP_SUB_F32, inp_binop_sub_f32)
@ -1979,16 +1987,14 @@ W1:
 		rts
 }
-__asm inp_binop_mul_f32
+__asm fmul
 {
 		jsr	freg.split_exp
 		lda	accu
 		ora	accu + 1
 		ora	accu + 2
 		bne	W1
 		sta	accu + 3
-		jmp	startup.exec
+		rts
 W1:
 		lda	tmp
 		ora	tmp + 1
@ -1998,7 +2004,7 @@ W1:
 		sta	accu + 1
 		sta	accu + 2
 		sta	accu + 3
-		jmp	startup.exec
+		rts
 W2:	
 		lda	#0
 		sta	tmp + 6
@ -2043,21 +2049,24 @@ W3:		and	#$7f
 		sta	accu + 1
 		lda	tmp + 6
 		sta	accu
 		rts
 }
 __asm inp_binop_mul_f32
 {
 		jsr	freg.split_exp
 		jsr fmul
 		jmp	startup.exec
 }
-#pragma	bytecode(BC_BINOP_MUL_F32, inp_binop_mul_f32)
+__asm fdiv
 __asm inp_binop_div_f32
 {
 		jsr	freg.split_exp
 		lda	accu
 		ora	accu + 1
 		ora	accu + 2
 		bne	W1
 		sta	accu + 3
-		jmp	startup.exec
+		rts
 W1:
 		lda	accu + 3
 		eor	tmp + 3
@ -2131,12 +2140,84 @@ W4:
 		sta	accu + 1
 		lda	tmp + 6
 		sta	accu
 		rts
 }
 #pragma	bytecode(BC_BINOP_MUL_F32, inp_binop_mul_f32)
 __asm inp_binop_div_f32
 {
 		jsr	freg.split_exp
 		jsr fdiv
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_BINOP_DIV_F32, inp_binop_div_f32)
 __asm fcmp
 {
 		lda	accu + 3
 		eor	tmp + 3
 		and	#$80
 		beq	W1
 		// different sig, check zero case
 		lda	accu + 3
 		and	#$7f
 		ora	accu + 2
 		ora	accu + 1
 		ora	accu
 		bne	W2
 		lda	tmp + 3
 		and	#$7f
 		ora	tmp + 2
 		ora	tmp + 1
 		ora	tmp + 0
 		beq	fcmpeq
 W2:		lda	accu + 3
 		bmi	fcmpgt
 		bpl	fcmplt		
 W1:		
 		// same sign
 		lda	accu + 3
 		cmp	tmp + 3
 		bne	W3
 		lda	accu + 2
 		cmp	tmp + 2
 		bne	W3
 		lda	accu + 1
 		cmp	tmp + 1
 		bne	W3
 		lda	accu
 		cmp	tmp
 		bne	W3
 fcmpeq:
 		lda	#0
 		rts
 W3:		bcs	W4
 		bit	accu + 3
 		bmi	fcmplt
 fcmpgt:
 		lda	#1
 		rts
 W4:		bit	accu + 3
 		bmi	fcmpgt
 fcmplt:
 		lda	#$ff
 		rts
 }
 __asm inp_binop_cmp_f32
 {
 		lda	(ip), y
@ -2223,7 +2304,7 @@ __asm inp_op_negate_f32
 #pragma	bytecode(BC_OP_NEGATE_F32, inp_op_negate_f32)
-__asm uin16_to_float
+__asm uint16_to_float
 {
 		lda	accu
 		ora	accu + 1
@ -2256,20 +2337,11 @@ W2:
 		rts
 }
-__asm inp_conv_u16_f32	
+__asm sint16_to_float
 {
 		jsr	uin16_to_float
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_CONV_U16_F32, inp_conv_u16_f32)
 __asm inp_conv_i16_f32		
 {
 		bit	accu + 1
 		bmi	W1
-		jsr	uin16_to_float
+		jmp	uint16_to_float
 		jmp	startup.exec
 W1:		
 		sec
 		lda	#0
@ -2278,16 +2350,31 @@ W1:
 		lda	#0
 		sbc	accu + 1
 		sta	accu + 1	
-		jsr	uin16_to_float
+		jsr	uint16_to_float
 		lda	accu + 3
 		ora	#$80
 		sta	accu + 3
 		rts
 }
 __asm inp_conv_u16_f32	
 {
 		jsr	uint16_to_float
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_CONV_U16_F32, inp_conv_u16_f32)
 __asm inp_conv_i16_f32		
 {
 		jsr	sint16_to_float
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_CONV_I16_F32, inp_conv_i16_f32)
-__asm inp_conv_f32_i16
+__asm f32_to_i16
 {
 		jsr	freg.split_aexp
 		lda	tmp + 4
@ -2296,7 +2383,7 @@ __asm inp_conv_f32_i16
 		lda	#0
 		sta	accu
 		sta	accu + 1
-		jmp	startup.exec
+		rts
 W1:
 		sec
 		sbc	#$8e
@ -2324,18 +2411,24 @@ W3:
 		lda	#0
 		sbc	accu + 2
 		sta	accu + 1
-		jmp	startup.exec
+		rts
 W4:
 		lda	accu + 1
 		sta	accu
 		lda	accu + 2
 		sta	accu + 1
 		rts
 }
 __asm inp_conv_f32_i16
 {
 		jsr	f32_to_i16
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_CONV_F32_I16, inp_conv_f32_i16)
-__asm inp_conv_f32_u16
+__asm f32_to_u16
 {
 		jsr	freg.split_aexp
 		lda	tmp + 4
@ -2344,7 +2437,7 @@ __asm inp_conv_f32_u16
 		lda	#0
 		sta	accu
 		sta	accu + 1
-		jmp	startup.exec
+		rts
 W1:
 		sec
 		sbc	#$8e
@ -2353,7 +2446,7 @@ W1:
 		lda	#$ff
 		sta	accu
 		sta	accu + 1
-		jmp	startup.exec
+		rts
 W3:
 		tax
 L1:
@ -2367,6 +2460,25 @@ W2:
 		lda	accu + 2
 		sta	accu + 1
 		rts
 }
 #pragma runtime(fsplita, freg.split_aexp)
 #pragma runtime(fsplitt, freg.split_texp)
 #pragma runtime(fmergea, freg.merge_aexp)
 #pragma runtime(faddsub, faddsub)
 #pragma runtime(fmul, fmul)
 #pragma runtime(fdiv, fdiv)
 #pragma runtime(fcmp, fcmp)
 #pragma runtime(ffromi, sint16_to_float)
 #pragma runtime(ffromu, uint16_to_float)
 #pragma runtime(ftoi, f32_to_i16)
 #pragma runtime(ftou, f32_to_u16)
 __asm inp_conv_f32_u16
 {
 		jsr f32_to_u16
 		jmp	startup.exec
 }
@ -2385,13 +2497,23 @@ __asm inp_op_abs_f32
 unsigned char ubitmask[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 __asm fround {
 ffloor:
 		bit	accu + 3
 		bpl	frdown
 		bmi	frup
 fceil:
 		bit	accu + 3
 		bmi	frdown
 		bpl	frup
 frdzero:
 		lda	#0
 		sta	accu
 		sta	accu + 1
 		sta	accu + 2
 		sta	accu + 3
-		jmp	startup.exec
+		rts
 frdown:
 		lda	tmp + 4
@ -2437,8 +2559,7 @@ frd2:
 		jmp	frd3
 frd3:
-		jsr	freg.merge_aexp
+		jmp	freg.merge_aexp
 		jmp	startup.exec
 frone:
 		lda	#$7f
@ -2448,8 +2569,8 @@ frone:
 		sta	accu + 1
 		lda	#$80
 		sta	accu + 2
-		jsr	freg.merge_aexp
+		jmp	freg.merge_aexp
-		jmp	startup.exec
+
 frup:
 		lda	accu
 		ora	accu + 1
@ -2528,17 +2649,32 @@ fru2:
 W3:		sta	accu + 2		
 		jmp	frdown		
 fru3:
-		jsr	freg.merge_aexp
+		jmp	freg.merge_aexp
 		jmp	startup.exec
 }
 #pragma runtime(fsplita, freg.split_aexp)
 #pragma runtime(fsplitt, freg.split_texp)
 #pragma runtime(fmergea, freg.merge_aexp)
 #pragma runtime(faddsub, faddsub)
 #pragma runtime(fmul, fmul)
 #pragma runtime(fdiv, fdiv)
 #pragma runtime(fcmp, fcmp)
 #pragma runtime(ffromi, sint16_to_float)
 #pragma runtime(ffromu, uint16_to_float)
 #pragma runtime(ftoi, f32_to_i16)
 #pragma runtime(ftou, f32_to_u16)
 #pragma runtime(ffloor, fround.ffloor)
 #pragma runtime(fceil, fround.fceil)
 __asm inp_op_floor_f32
 {
 		jsr	freg.split_aexp
 		bit	accu + 3
 		bpl	W1
-		jmp	fround.frup
+		jsr	fround.frup
-W1:		jmp	fround.frdown
+		jmp	startup.exec
 W1:		jsr	fround.frdown
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_OP_FLOOR_F32, inp_op_floor_f32)
@ -2549,8 +2685,10 @@ __asm inp_op_ceil_f32
 		jsr	freg.split_aexp
 		bit	accu + 3
 		bpl	W1
-		jmp	fround.frdown
+		jsr	fround.frdown
-W1:		jmp	fround.frup		
+		jmp	startup.exec
 W1:		jsr	fround.frup		
 		jmp	startup.exec
 }
 #pragma	bytecode(BC_OP_CEIL_F32, inp_op_ceil_f32)
--- a/oscar64/ByteCodeGenerator.cpp
+++ b/oscar64/ByteCodeGenerator.cpp
@ -2540,7 +2540,7 @@ ByteCodeBasicBlock* ByteCodeBasicBlock::BypassEmptyBlocks(void)
 {
 	if (mBypassed)
 		return this;
-	else if (!mFalseJump && mCode.Size() == 0)
+	else if (!mFalseJump && mCode.Size() == 0 && this != mTrueJump)
 		return mTrueJump->BypassEmptyBlocks();
 	else
 	{
@ -2558,7 +2558,7 @@ ByteCodeBasicBlock* ByteCodeBasicBlock::BypassEmptyBlocks(void)
 void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, uint8 * target)
 {
 	int i;
-	int next;
+	int next, end;
 	int pos, at;
 	uint8 b;
@ -2573,7 +2573,8 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, uint8 * target)
 			generator->mRelocations.Push(rl);
 		}
-		next = mOffset + mCode.Size();
+		end = mOffset + mCode.Size();
 		next = mOffset + mSize;
 		if (mFalseJump)
 		{
@ -2581,29 +2582,29 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, uint8 * target)
 			{
 				if (mTrueJump->mOffset <= mOffset)
 				{
-					next += PutBranch(generator, mBranch, mTrueJump->mOffset - next);
+					end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
-					next += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - next);
+					end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end);
 				}
 				else
 				{
-					next += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - next);
+					end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end);
 				}
 			}
 			else
 			{
-				next += PutBranch(generator, mBranch, mTrueJump->mOffset - next);
+				end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
 			}
 		}
 		else if (mTrueJump)
 		{
 			if (mTrueJump->mOffset != next)
 			{
-				next += PutBranch(generator, BC_JUMPS, mTrueJump->mOffset - next);
+				end += PutBranch(generator, BC_JUMPS, mTrueJump->mOffset - end);
 			}
 		}
-		assert(next - mOffset == mSize);
+		assert(end == next);
 		for (i = 0; i < mCode.Size(); i++)
 		{
--- a/oscar64/NativeCodeGenerator.cpp
+++ b/oscar64/NativeCodeGenerator.cpp
--- a/oscar64/NativeCodeGenerator.h
+++ b/oscar64/NativeCodeGenerator.h
@ -63,6 +63,7 @@ public:
 	void UnaryOperator(InterCodeProcedure* proc, const InterInstruction& ins);
 	void RelationalOperator(InterCodeProcedure* proc, const InterInstruction& ins, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump);
 	void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction& ins);
 	void NumericConversion(InterCodeProcedure* proc, const InterInstruction& ins);
 };
 class NativeCodeProcedure