Optimize hires line draw

This commit is contained in:
drmortalwombat 2024-12-15 16:34:53 +01:00
parent 50c7e10814
commit 4fce263228
5 changed files with 127 additions and 57 deletions

View File

@ -123,8 +123,9 @@ int lmul4f12s(int x, int y)
lda #0
sta accu + 1
L2:
bcc W4
L2:
tay
clc
lda accu + 1
@ -165,7 +166,7 @@ W1:
bcc W2
tay
sec
// sec ; we know it is set here
lda accu + 1
sbc y
sta accu + 1

View File

@ -525,7 +525,7 @@ void bm_polygon_nc_fill(const Bitmap * bm, const ClipRect * clip, int * px, int
static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool left, bool up, char pattern, LineOp op)
{
char ip = 0;
bool delta16 = ((dx | dy) & 0xff80) != 0;
bool delta16 =((dx | dy) & 0xff80) != 0;
// ylow
ip += asm_im(BLIT_CODE + ip, ASM_LDY, ly);
@ -579,33 +579,51 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
break;
}
if (dx && dy)
if (dy)
{
bool delta8 = false;
if (dx)
{
// m >= 0
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + delta16);
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, delta16 ? 5 + 15 + 13 + 2 : 5 + 15 + 7 + 2);
char n = delta16 ? 18 + 13 + 2 : 18 + 7 + 2;
if (!up) n++;
ip += asm_rl(BLIT_CODE + ip, ASM_BMI, n);
delta8 = !delta16;
}
if (dy)
if (up)
{
ip += asm_np(BLIT_CODE + ip, up ? ASM_DEY : ASM_INY);
ip += asm_im(BLIT_CODE + ip, ASM_CPY, up ? 0xff : 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, 15);
ip += asm_np(BLIT_CODE + ip, ASM_DEY);
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta8 ? 17 : 15);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x07);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, stride >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, up ? 0x07 : 0x00);
}
else
{
ip += asm_np(BLIT_CODE + ip, ASM_INY);
ip += asm_im(BLIT_CODE + ip, ASM_CPY, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BNE, delta8 ? 16 : 14);
ip += asm_im(BLIT_CODE + ip, ASM_LDY, 0x00);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, (stride - 1) >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP + 1);
}
if (dx && dy)
if (dx)
{
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_np(BLIT_CODE + ip, ASM_SEC);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
@ -614,47 +632,52 @@ static inline void buildline(char ly, char lx, int dx, int dy, int stride, bool
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_SBC, dx >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 13 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
else
{
// We know regdp to be in the accu at this point
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, 5 + 4 + 12);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
}
}
// m < 0
ip += asm_rl(BLIT_CODE + ip, ASM_BPL, delta16 ? 4 + 13 + 13 : 4 + 13 + 7);
}
if (dx)
{
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ASL : ASM_LSR, REG_D0);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 13);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 12);
ip += asm_zp(BLIT_CODE + ip, left ? ASM_ROL : ASM_ROR, REG_D0);
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_SP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, left ? 0xf8 : 0x08);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
if (left)
{
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0xf8);
ip += asm_rl(BLIT_CODE + ip, ASM_BCS, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_DEC, REG_SP + 1);
}
else
{
ip += asm_im(BLIT_CODE + ip, ASM_ADC, 0x08);
ip += asm_rl(BLIT_CODE + ip, ASM_BCC, 2);
ip += asm_zp(BLIT_CODE + ip, ASM_INC, REG_SP + 1);
}
}
if (dx && dy)
{
ip += asm_np(BLIT_CODE + ip, ASM_CLC);
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy & 0xff);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP);
if (delta16)
{
ip += asm_zp(BLIT_CODE + ip, ASM_LDA, REG_DP + 1);
ip += asm_im(BLIT_CODE + ip, ASM_ADC, dy >> 8);
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_DP + 1);
}
ip += asm_zp(BLIT_CODE + ip, ASM_STA, REG_SP);
}
// l --

View File

@ -13613,7 +13613,7 @@ bool InterCodeBasicBlock::MoveTrainCrossBlock(void)
FastNumberSet nset(mEntryRequiredTemps.Size());
InterInstruction* ins(mInstructions[i]);
if (ins->mCode == IC_STORE)
if (ins->mCode == IC_STORE && ins->mSrc[0].mFinal)
{
for (int k = 0; k < ins->mNumOperands; k++)
{
@ -22996,7 +22996,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "test");
CheckFunc = !strcmp(mIdent->mString, "main");
CheckCase = false;
mEntryBlock = mBlocks[0];

View File

@ -37694,7 +37694,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
while (ai < mIns.Size() && !mIns[ai].ChangesAccu())
ai++;
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z))
if (ai < mIns.Size() && !(mIns[ai].mLive & LIVE_CPU_REG_Z) && !mEntryRequiredRegs[CPU_REG_A])
{
if (mIns[ai].mType == ASMIT_LDA && mIns[ai].mMode == ASMIM_IMMEDIATE)
{
@ -51811,7 +51811,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mInterProc = proc;
mInterProc->mLinkerObject->mNativeProc = this;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmmcu_line");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "main");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];

View File

@ -48,7 +48,7 @@ struct Point
};
Point tcorners[8], pcorners[8];
__striped Point tcorners[8], pcorners[8];
void drawCube(void)
{
@ -77,6 +77,45 @@ void hideCube(void)
}
}
void xorCube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
if (!(i & 2))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
if (!(i & 4))
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
pcorners[i] = tcorners[i];
}
}
void xor2Cube(void)
{
for(char i=0; i<8; i++)
{
if (!(i & 1))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 1].x, pcorners[i | 1].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 1].x, tcorners[i | 1].y, 0xff, LINOP_XOR);
}
if (!(i & 2))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 2].x, pcorners[i | 2].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 2].x, tcorners[i | 2].y, 0xff, LINOP_XOR);
}
if (!(i & 4))
{
bm_line(&Screen, &cr, pcorners[i].x, pcorners[i].y, pcorners[i | 4].x, pcorners[i | 4].y, 0xff, LINOP_XOR);
bm_line(&Screen, &cr, tcorners[i].x, tcorners[i].y, tcorners[i | 4].x, tcorners[i | 4].y, 0xff, LINOP_XOR);
}
}
for(char i=0; i<8; i++)
pcorners[i] = tcorners[i];
}
#if 1
F12Vector3 corners[8];
@ -112,8 +151,15 @@ int main(void)
tcorners[i].y = lmuldiv16s(vd.v[1], 140, vd.v[2] + 4 * FIX12_ONE) + 100;
}
#if 1
if (k)
xor2Cube();
else
xorCube();
#else
hideCube();
drawCube();
#endif
}