Optimize hires line draw
This commit is contained in:
parent
50c7e10814
commit
4fce263228
|
@ -123,8 +123,9 @@ int lmul4f12s(int x, int y)
|
||||||
|
|
||||||
lda #0
|
lda #0
|
||||||
sta accu + 1
|
sta accu + 1
|
||||||
L2:
|
|
||||||
bcc W4
|
bcc W4
|
||||||
|
L2:
|
||||||
tay
|
tay
|
||||||
clc
|
clc
|
||||||
lda accu + 1
|
lda accu + 1
|
||||||
|
@ -165,7 +166,7 @@ W1:
|
||||||
bcc W2
|
bcc W2
|
||||||
|
|
||||||
tay
|
tay
|
||||||
sec
|
// sec ; we know it is set here
|
||||||
lda accu + 1
|
lda accu + 1
|
||||||
sbc y
|
sbc y
|
||||||
sta accu + 1
|
sta accu + 1
|
||||||
|
|
|
@ -525,7 +525,7 @@ void bm_polygon_nc_fill(const Bitmap * bm, const ClipRect * clip, int * px, int
|
||||||
static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op)
|
static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op)
|
||||||
{
|
{
|
||||||
char ip = 0;
|
char ip = 0;
|
||||||
bool delta16 = ((dx | dy) & 0xff80) != 0;
|
bool delta16 =((dx | dy) & 0xff80) != 0;
|
||||||
|
|
||||||
// ylow
|
// ylow
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly);
|
ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly);
|
||||||
|
@ -579,82 +579,105 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dx && dy)
|
|
||||||
{
|
|
||||||
// m >= 0
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
|
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dy)
|
if (dy)
|
||||||
{
|
{
|
||||||
ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY);
|
bool delta8 = false;
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08);
|
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15);
|
|
||||||
|
|
||||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
if (dx)
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dx && dy)
|
|
||||||
{
|
|
||||||
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
|
||||||
|
|
||||||
if (delta16)
|
|
||||||
{
|
{
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
// m >= 0
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2;
|
||||||
|
if (!up) n++;
|
||||||
|
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n);
|
||||||
|
delta8 = !delta16;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (up)
|
||||||
|
{
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_DEY);
|
||||||
|
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15);
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_INY);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08);
|
||||||
|
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dx)
|
||||||
|
{
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||||
|
|
||||||
|
if (delta16)
|
||||||
|
{
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
||||||
|
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12);
|
||||||
|
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We know regdp to be in the accu at this point
|
||||||
|
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12);
|
||||||
|
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
||||||
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// m < 0
|
// m < 0
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dx)
|
if (dx)
|
||||||
{
|
{
|
||||||
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
|
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13);
|
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12);
|
||||||
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
|
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
|
||||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
|
||||||
|
|
||||||
if (left)
|
if (left)
|
||||||
{
|
{
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8);
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
|
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
|
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08);
|
||||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
|
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
|
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (dx && dy)
|
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||||
{
|
|
||||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
|
||||||
if (delta16)
|
|
||||||
{
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
|
||||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
|
|
||||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// l --
|
// l --
|
||||||
|
|
|
@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void)
|
||||||
FastNumberSet nset(mEntryRequiredTemps.Size());
|
FastNumberSet nset(mEntryRequiredTemps.Size());
|
||||||
|
|
||||||
InterInstruction* ins(mInstructions[i]);
|
InterInstruction* ins(mInstructions[i]);
|
||||||
if (ins->mCode == IC_STORE)
|
if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal)
|
||||||
{
|
{
|
||||||
for (int k = 0; k < ins->mNumOperands; k++)
|
for (int k = 0; k < ins->mNumOperands; k++)
|
||||||
{
|
{
|
||||||
|
@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void)
|
||||||
{
|
{
|
||||||
GrowingTypeArray tstack(IT_NONE);
|
GrowingTypeArray tstack(IT_NONE);
|
||||||
|
|
||||||
CheckFunc = !strcmp(mIdent->mString, "test");
|
CheckFunc = !strcmp(mIdent->mString, "main");
|
||||||
CheckCase = false;
|
CheckCase = false;
|
||||||
|
|
||||||
mEntryBlock = mBlocks[0];
|
mEntryBlock = mBlocks[0];
|
||||||
|
|
|
@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
|
||||||
while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
|
while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
|
||||||
ai++;
|
ai++;
|
||||||
|
|
||||||
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z))
|
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A])
|
||||||
{
|
{
|
||||||
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
|
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
|
||||||
{
|
{
|
||||||
|
@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
|
||||||
mInterProc = proc;
|
mInterProc = proc;
|
||||||
mInterProc->mLinkerObject->mNativeProc = this;
|
mInterProc->mLinkerObject->mNativeProc = this;
|
||||||
|
|
||||||
CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line");
|
CheckFunc = !strcmp(mInterProc->mIdent->mString, "main");
|
||||||
|
|
||||||
int nblocks = proc->mBlocks.Size();
|
int nblocks = proc->mBlocks.Size();
|
||||||
tblocks = new NativeCodeBasicBlock * [nblocks];
|
tblocks = new NativeCodeBasicBlock * [nblocks];
|
||||||
|
|
|
@ -48,7 +48,7 @@ struct Point
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
Point tcorners[8], pcorners[8];
|
__striped Point tcorners[8], pcorners[8];
|
||||||
|
|
||||||
void drawCube(void)
|
void drawCube(void)
|
||||||
{
|
{
|
||||||
|
@ -77,6 +77,45 @@ void hideCube(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void xorCube(void)
|
||||||
|
{
|
||||||
|
for(char i=0; i<8; i++)
|
||||||
|
{
|
||||||
|
if (!(i & 1))
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||||
|
if (!(i & 2))
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||||
|
if (!(i & 4))
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||||
|
pcorners[i] = tcorners[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void xor2Cube(void)
|
||||||
|
{
|
||||||
|
for(char i=0; i<8; i++)
|
||||||
|
{
|
||||||
|
if (!(i & 1))
|
||||||
|
{
|
||||||
|
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||||
|
}
|
||||||
|
if (!(i & 2))
|
||||||
|
{
|
||||||
|
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||||
|
}
|
||||||
|
if (!(i & 4))
|
||||||
|
{
|
||||||
|
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||||
|
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(char i=0; i<8; i++)
|
||||||
|
pcorners[i] = tcorners[i];
|
||||||
|
}
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
|
|
||||||
F12Vector3 corners[8];
|
F12Vector3 corners[8];
|
||||||
|
@ -112,8 +151,15 @@ int main(void)
|
||||||
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
|
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
if (k)
|
||||||
|
xor2Cube();
|
||||||
|
else
|
||||||
|
xorCube();
|
||||||
|
#else
|
||||||
hideCube();
|
hideCube();
|
||||||
drawCube();
|
drawCube();
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue