Optimize hires line draw

This commit is contained in:
drmortalwombat 2024-12-15 16:34:53 +01:00
parent 50c7e10814
commit 4fce263228
5 changed files with 127 additions and 57 deletions

View File

@ -123,8 +123,9 @@ int lmul4f12s(int x, int y)
lda #0 lda #0
sta accu + 1 sta accu + 1
L2:
bcc W4 bcc W4
L2:
tay tay
clc clc
lda accu + 1 lda accu + 1
@ -165,7 +166,7 @@ W1:
bcc W2 bcc W2
tay tay
sec // sec ; we know it is set here
lda accu + 1 lda accu + 1
sbc y sbc y
sta accu + 1 sta accu + 1

View File

@ -525,7 +525,7 @@ void bm_polygon_nc_fill(const Bitmap * bm, const ClipRect * clip, int * px, int
static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op) static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op)
{ {
char ip = 0; char ip = 0;
bool delta16 = ((dx | dy) & 0xff80) != 0; bool delta16 =((dx | dy) & 0xff80) != 0;
// ylow // ylow
ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly); ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly);
@ -579,82 +579,105 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
break; break;
} }
if (dx && dy)
{
// m >= 0
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2);
}
if (dy) if (dy)
{ {
ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY); bool delta8 = false;
ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15);
ip += asm_np(BLIT_CODE + ip, ASM_CLC); if (dx)
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00);
}
if (dx && dy)
{
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
if (delta16)
{ {
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1); // m >= 0
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1); char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2;
if (!up) n++;
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n);
delta8 = !delta16;
}
if (up)
{
ip += asm_np(BLIT_CODE + ip, ASM_DEY);
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
}
else
{
ip += asm_np(BLIT_CODE + ip, ASM_INY);
ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
}
if (dx)
{
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
if (delta16)
{
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
else
{
// We know regdp to be in the accu at this point
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
}
} }
// m < 0 // m < 0
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7);
} }
if (dx) if (dx)
{ {
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0); ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13); ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12);
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0); ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP); ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
if (left) if (left)
{ {
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8);
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2); ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
} }
else else
{ {
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2); ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1); ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
} }
}
if (dx && dy) ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
{
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
if (delta16)
{
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
} }
// l -- // l --

View File

@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void)
FastNumberSet nset(mEntryRequiredTemps.Size()); FastNumberSet nset(mEntryRequiredTemps.Size());
InterInstruction* ins(mInstructions[i]); InterInstruction* ins(mInstructions[i]);
if (ins->mCode == IC_STORE) if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal)
{ {
for (int k = 0; k < ins->mNumOperands; k++) for (int k = 0; k < ins->mNumOperands; k++)
{ {
@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void)
{ {
GrowingTypeArray tstack(IT_NONE); GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "test"); CheckFunc = !strcmp(mIdent->mString, "main");
CheckCase = false; CheckCase = false;
mEntryBlock = mBlocks[0]; mEntryBlock = mBlocks[0];

View File

@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
while (ai < mIns.Size() && !mIns[ai].ChangesAccu()) while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
ai++; ai++;
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z)) if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A])
{ {
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE) if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
{ {
@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mInterProc = proc; mInterProc = proc;
mInterProc->mLinkerObject->mNativeProc = this; mInterProc->mLinkerObject->mNativeProc = this;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line"); CheckFunc = !strcmp(mInterProc->mIdent->mString, "main");
int nblocks = proc->mBlocks.Size(); int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks]; tblocks = new NativeCodeBasicBlock * [nblocks];

View File

@ -48,7 +48,7 @@ struct Point
}; };
Point tcorners[8], pcorners[8]; __striped Point tcorners[8], pcorners[8];
void drawCube(void) void drawCube(void)
{ {
@ -77,6 +77,45 @@ void hideCube(void)
} }
} }
void xorCube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
if (!(i & 2))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
if (!(i & 4))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
pcorners[i] = tcorners[i];
}
}
void xor2Cube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
}
if (!(i & 2))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
}
if (!(i & 4))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
}
}
for(char i=0; i<8; i++)
pcorners[i] = tcorners[i];
}
#if 1 #if 1
F12Vector3 corners[8]; F12Vector3 corners[8];
@ -112,8 +151,15 @@ int main(void)
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100; tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
} }
#if 1
if (k)
xor2Cube();
else
xorCube();
#else
hideCube(); hideCube();
drawCube(); drawCube();
#endif
} }