Optimize hires line draw
This commit is contained in:
parent
50c7e10814
commit
4fce263228
|
@ -123,8 +123,9 @@ int lmul4f12s(int x, int y)
|
|||
|
||||
lda #0
|
||||
sta accu + 1
|
||||
L2:
|
||||
|
||||
bcc W4
|
||||
L2:
|
||||
tay
|
||||
clc
|
||||
lda accu + 1
|
||||
|
@ -165,7 +166,7 @@ W1:
|
|||
bcc W2
|
||||
|
||||
tay
|
||||
sec
|
||||
// sec ; we know it is set here
|
||||
lda accu + 1
|
||||
sbc y
|
||||
sta accu + 1
|
||||
|
|
|
@ -579,33 +579,51 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
|
|||
break;
|
||||
}
|
||||
|
||||
if (dx && dy)
|
||||
if (dy)
|
||||
{
|
||||
bool delta8 = false;
|
||||
|
||||
if (dx)
|
||||
{
|
||||
// m >= 0
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2);
|
||||
char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2;
|
||||
if (!up) n++;
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n);
|
||||
delta8 = !delta16;
|
||||
}
|
||||
|
||||
if (dy)
|
||||
if (up)
|
||||
{
|
||||
ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15);
|
||||
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_DEY);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15);
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00);
|
||||
}
|
||||
else
|
||||
{
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_INY);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
|
||||
}
|
||||
|
||||
if (dx && dy)
|
||||
if (dx)
|
||||
{
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||
|
||||
|
@ -614,47 +632,52 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
|
|||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12);
|
||||
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// We know regdp to be in the accu at this point
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12);
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||
}
|
||||
}
|
||||
|
||||
// m < 0
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7);
|
||||
}
|
||||
|
||||
if (dx)
|
||||
{
|
||||
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12);
|
||||
|
||||
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||
|
||||
if (left)
|
||||
{
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08);
|
||||
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (dx && dy)
|
||||
{
|
||||
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
|
||||
if (delta16)
|
||||
{
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
|
||||
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
|
||||
}
|
||||
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
|
||||
}
|
||||
|
||||
// l --
|
||||
|
|
|
@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void)
|
|||
FastNumberSet nset(mEntryRequiredTemps.Size());
|
||||
|
||||
InterInstruction* ins(mInstructions[i]);
|
||||
if (ins->mCode == IC_STORE)
|
||||
if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal)
|
||||
{
|
||||
for (int k = 0; k < ins->mNumOperands; k++)
|
||||
{
|
||||
|
@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void)
|
|||
{
|
||||
GrowingTypeArray tstack(IT_NONE);
|
||||
|
||||
CheckFunc = !strcmp(mIdent->mString, "test");
|
||||
CheckFunc = !strcmp(mIdent->mString, "main");
|
||||
CheckCase = false;
|
||||
|
||||
mEntryBlock = mBlocks[0];
|
||||
|
|
|
@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
|
|||
while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
|
||||
ai++;
|
||||
|
||||
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z))
|
||||
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A])
|
||||
{
|
||||
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
|
||||
{
|
||||
|
@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
|
|||
mInterProc = proc;
|
||||
mInterProc->mLinkerObject->mNativeProc = this;
|
||||
|
||||
CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line");
|
||||
CheckFunc = !strcmp(mInterProc->mIdent->mString, "main");
|
||||
|
||||
int nblocks = proc->mBlocks.Size();
|
||||
tblocks = new NativeCodeBasicBlock * [nblocks];
|
||||
|
|
|
@ -48,7 +48,7 @@ struct Point
|
|||
};
|
||||
|
||||
|
||||
Point tcorners[8], pcorners[8];
|
||||
__striped Point tcorners[8], pcorners[8];
|
||||
|
||||
void drawCube(void)
|
||||
{
|
||||
|
@ -77,6 +77,45 @@ void hideCube(void)
|
|||
}
|
||||
}
|
||||
|
||||
void xorCube(void)
|
||||
{
|
||||
for(char i=0; i<8; i++)
|
||||
{
|
||||
if (!(i & 1))
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||
if (!(i & 2))
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||
if (!(i & 4))
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||
pcorners[i] = tcorners[i];
|
||||
}
|
||||
}
|
||||
|
||||
void xor2Cube(void)
|
||||
{
|
||||
for(char i=0; i<8; i++)
|
||||
{
|
||||
if (!(i & 1))
|
||||
{
|
||||
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
|
||||
}
|
||||
if (!(i & 2))
|
||||
{
|
||||
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
|
||||
}
|
||||
if (!(i & 4))
|
||||
{
|
||||
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
|
||||
}
|
||||
}
|
||||
|
||||
for(char i=0; i<8; i++)
|
||||
pcorners[i] = tcorners[i];
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
F12Vector3 corners[8];
|
||||
|
@ -112,8 +151,15 @@ int main(void)
|
|||
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
|
||||
}
|
||||
|
||||
#if 1
|
||||
if (k)
|
||||
xor2Cube();
|
||||
else
|
||||
xorCube();
|
||||
#else
|
||||
hideCube();
|
||||
drawCube();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue