Optimize hires line draw

This commit is contained in:
drmortalwombat 2024-12-15 16:34:53 +01:00
parent 50c7e10814
commit 4fce263228
5 changed files with 127 additions and 57 deletions

View File

@ -123,8 +123,9 @@ int lmul4f12s(int x, int y)
lda #0 lda #0
sta accu + 1 sta accu + 1
L2:
bcc W4 bcc W4
L2:
tay tay
clc clc
lda accu + 1 lda accu + 1
@ -165,7 +166,7 @@ W1:
bcc W2 bcc W2
tay tay
sec // sec ; we know it is set here
lda accu + 1 lda accu + 1
sbc y sbc y
sta accu + 1 sta accu + 1

View File

@ -579,33 +579,51 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
break; break;
} }
if (dx && dy) if (dy)
{
bool delta8 = false;
if (dx)
{ {
// m >= 0 // m >= 0
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2); char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2;
if (!up) n++;
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n);
delta8 = !delta16;
} }
if (dy) if (up)
{ {
ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY); ip += asm_np(BLIT_CODE + ip, ASM_DEY);
ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08); ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15);
ip += asm_np(BLIT_CODE + ip, ASM_CLC); ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff); ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP); ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8); ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00); }
else
{
ip += asm_np(BLIT_CODE + ip, ASM_INY);
ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
} }
if (dx && dy) if (dx)
{ {
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff); ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP); ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
@ -614,47 +632,52 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8); ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
else
{
// We know regdp to be in the accu at this point
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
}
} }
// m < 0 // m < 0
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7);
} }
if (dx) if (dx)
{ {
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0); ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13); ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12);
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0); ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
if (left) if (left)
{ {
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8);
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2); ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
} }
else else
{ {
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2); ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
} }
}
if (dx && dy) ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
{
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
if (delta16)
{
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
} }
// l -- // l --

View File

@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void)
FastNumberSet nset(mEntryRequiredTemps.Size()); FastNumberSet nset(mEntryRequiredTemps.Size());
InterInstruction* ins(mInstructions[i]); InterInstruction* ins(mInstructions[i]);
if (ins->mCode == IC_STORE) if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal)
{ {
for (int k = 0; k < ins->mNumOperands; k++) for (int k = 0; k < ins->mNumOperands; k++)
{ {
@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void)
{ {
GrowingTypeArray tstack(IT_NONE); GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "test"); CheckFunc = !strcmp(mIdent->mString, "main");
CheckCase = false; CheckCase = false;
mEntryBlock = mBlocks[0]; mEntryBlock = mBlocks[0];

View File

@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
while (ai < mIns.Size() && !mIns[ai].ChangesAccu()) while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
ai++; ai++;
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z)) if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A])
{ {
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE) if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
{ {
@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mInterProc = proc; mInterProc = proc;
mInterProc->mLinkerObject->mNativeProc = this; mInterProc->mLinkerObject->mNativeProc = this;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line"); CheckFunc = !strcmp(mInterProc->mIdent->mString, "main");
int nblocks = proc->mBlocks.Size(); int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks]; tblocks = new NativeCodeBasicBlock * [nblocks];

View File

@ -48,7 +48,7 @@ struct Point
}; };
Point tcorners[8], pcorners[8]; __striped Point tcorners[8], pcorners[8];
void drawCube(void) void drawCube(void)
{ {
@ -77,6 +77,45 @@ void hideCube(void)
} }
} }
void xorCube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
if (!(i & 2))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
if (!(i & 4))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
pcorners[i] = tcorners[i];
}
}
void xor2Cube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
}
if (!(i & 2))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
}
if (!(i & 4))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
}
}
for(char i=0; i<8; i++)
pcorners[i] = tcorners[i];
}
#if 1 #if 1
F12Vector3 corners[8]; F12Vector3 corners[8];
@ -112,8 +151,15 @@ int main(void)
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100; tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
} }
#if 1
if (k)
xor2Cube();
else
xorCube();
#else
hideCube(); hideCube();
drawCube(); drawCube();
#endif
} }