diff --git a/include/fixmath.c b/include/fixmath.c index 7624dbc..23d194e 100644 --- a/include/fixmath.c +++ b/include/fixmath.c @@ -123,8 +123,9 @@ int lmul4f12s(int x, int y) lda #0 sta accu + 1 -L2: + bcc W4 +L2: tay clc lda accu + 1 @@ -165,7 +166,7 @@ W1: bcc W2 tay - sec +// sec ; we know it is set here lda accu + 1 sbc y sta accu + 1 diff --git a/include/gfx/bitmap.c b/include/gfx/bitmap.c index f25f788..ad796e0 100644 --- a/include/gfx/bitmap.c +++ b/include/gfx/bitmap.c @@ -525,7 +525,7 @@ void bm_polygon_nc_fill(const Bitmap * bm, const ClipRect * clip, int * px, int static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op) { char ip = 0; - bool delta16 = ((dx | dy) & 0xff80) != 0; + bool delta16 =((dx | dy) & 0xff80) != 0; // ylow ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly); @@ -579,82 +579,105 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool break; } - if (dx && dy) - { - // m >= 0 - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16); - ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2); - } - if (dy) { - ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY); - ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08); - ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15); + bool delta8 = false; - ip += asm_np(BLIT_CODE + ip, ASM_CLC); - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); - ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1); - ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1); - ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00); - } - - if (dx && dy) - { - ip += asm_np(BLIT_CODE + ip, ASM_SEC); - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP); - ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); - - if (delta16) + if (dx) { - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); - ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); + // m >= 0 + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16); + char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2; + if (!up) n++; + ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n); + delta8 = !delta16; + } + + if (up) + { + ip += asm_np(BLIT_CODE + ip, ASM_DEY); + ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15); + ip += asm_np(BLIT_CODE + ip, ASM_CLC); + ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1); + } + else + { + ip += asm_np(BLIT_CODE + ip, ASM_INY); + ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08); + ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14); + ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1); + } + + if (dx) + { + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP); + ip += asm_np(BLIT_CODE + ip, ASM_SEC); + ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); + + if (delta16) + { + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); + ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); + ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12); + + ip += asm_np(BLIT_CODE + ip, ASM_CLC); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); + } + else + { + // We know regdp to be in the accu at this point + ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12); + ip += asm_np(BLIT_CODE + ip, ASM_CLC); + ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff); + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); + } } // m < 0 - ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7); } if (dx) { ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0); - ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13); + ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12); ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0); - ip += asm_np(BLIT_CODE + ip, ASM_CLC); + ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); - ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); if (left) { + ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8); ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2); ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1); } else { + ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08); ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2); ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1); } - } - if (dx && dy) - { - ip += asm_np(BLIT_CODE + ip, ASM_CLC); - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP); - ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); - if (delta16) - { - ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); - ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8); - ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); - } + ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); } // l -- diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index ee20015..019cd04 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void) FastNumberSet nset(mEntryRequiredTemps.Size()); InterInstruction* ins(mInstructions[i]); - if (ins->mCode == IC_STORE) + if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal) { for (int k = 0; k < ins->mNumOperands; k++) { @@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "test"); + CheckFunc = !strcmp(mIdent->mString, "main"); CheckCase = false; mEntryBlock = mBlocks[0]; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 99ab5d3..682cdff 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc while (ai < mIns.Size() && !mIns[ai].ChangesAccu()) ai++; - if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z)) + if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A]) { if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE) { @@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc = proc; mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "main"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; diff --git a/samples/hires/cube3d.c b/samples/hires/cube3d.c index 6ef119b..5c5f0f7 100644 --- a/samples/hires/cube3d.c +++ b/samples/hires/cube3d.c @@ -48,7 +48,7 @@ struct Point }; -Point tcorners[8], pcorners[8]; +__striped Point tcorners[8], pcorners[8]; void drawCube(void) { @@ -77,6 +77,45 @@ void hideCube(void) } } +void xorCube(void) +{ + for(char i=0; i<8; i++) + { + if (!(i & 1)) + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR); + if (!(i & 2)) + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR); + if (!(i & 4)) + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR); + pcorners[i] = tcorners[i]; + } +} + +void xor2Cube(void) +{ + for(char i=0; i<8; i++) + { + if (!(i & 1)) + { + bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR); + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR); + } + if (!(i & 2)) + { + bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR); + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR); + } + if (!(i & 4)) + { + bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR); + bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR); + } + } + + for(char i=0; i<8; i++) + pcorners[i] = tcorners[i]; +} + #if 1 F12Vector3 corners[8]; @@ -112,8 +151,15 @@ int main(void) tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100; } +#if 1 + if (k) + xor2Cube(); + else + xorCube(); +#else hideCube(); drawCube(); +#endif }