Optimize indexed static variable access

This commit is contained in:
drmortalwombat 2022-02-26 18:10:23 +01:00
parent 2719b1156b
commit 756245694f
5 changed files with 239 additions and 27 deletions

View File

@ -2938,8 +2938,7 @@ __asm fcmp
{ {
lda accu + 3 lda accu + 3
eor tmp + 3 eor tmp + 3
and #$80 bpl W1
beq W1
// different sig, check zero case // different sig, check zero case
@ -3006,8 +3005,7 @@ __asm inp_binop_cmp_f32
lda accu + 3 lda accu + 3
eor $03, x eor $03, x
and #$80 bpl W1
beq W1
// different sig, check zero case // different sig, check zero case

View File

@ -9738,6 +9738,102 @@ bool NativeCodeBasicBlock::FindImmediateStore(int at, int reg, const NativeCodeI
} }
bool NativeCodeBasicBlock::CheckGlobalAddressSumYPointer(int reg, int at, int yval)
{
if (!mPatched)
{
mPatched = true;
while (at < mIns.Size())
{
NativeCodeInstruction& ins(mIns[at]);
if (ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1))
return false;
else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg)
{
if (yval < 0)
return false;
else if (!(ins.mLive & LIVE_MEM))
return true;
}
if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_IMMEDIATE)
yval = ins.mAddress;
else if (ins.mType == ASMIT_INY && yval >= 0)
yval = (yval + 1) & 255;
else if (ins.mType == ASMIT_DEY && yval >= 0)
yval = (yval - 1) & 255;
else if (ins.ChangesYReg())
yval = -1;
at++;
}
if (mTrueJump && !mTrueJump->CheckGlobalAddressSumYPointer(reg, 0, yval))
return false;
if (mFalseJump && !mFalseJump->CheckGlobalAddressSumYPointer(reg, 0, yval))
return false;
}
return true;
}
bool NativeCodeBasicBlock::PatchGlobalAddressSumYPointer(int reg, int at, int yval, LinkerObject* lobj, int address)
{
bool changed = false;
if (!mPatched)
{
mPatched = true;
while (at < mIns.Size())
{
NativeCodeInstruction& ins(mIns[at]);
assert(!(ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1)));
if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg)
{
assert(yval >= 0);
bool done = !(ins.mLive & LIVE_MEM);
ins.mMode = ASMIM_ABSOLUTE_Y;
ins.mLinkerObject = lobj;
ins.mAddress = address + yval;
if (ins.mLive & LIVE_CPU_REG_Y)
mIns.Insert(at + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yval));
mIns.Insert(at, NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, reg));
at++;
changed = true;
if (done)
return changed;
}
else if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_IMMEDIATE)
yval = ins.mAddress;
else if (ins.mType == ASMIT_INY && yval >= 0)
yval = (yval + 1) & 255;
else if (ins.mType == ASMIT_DEY && yval >= 0)
yval = (yval - 1) & 255;
else if (ins.ChangesYReg())
yval = -1;
at++;
}
if (mTrueJump && mTrueJump->PatchGlobalAddressSumYPointer(reg, 0, yval, lobj, address))
changed = true;
if (mFalseJump && mFalseJump->PatchGlobalAddressSumYPointer(reg, 0, yval, lobj, address))
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins, uint32& flags, int& addr) bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins, uint32& flags, int& addr)
{ {
flags = 0; flags = 0;
@ -10849,6 +10945,19 @@ bool NativeCodeBasicBlock::MoveLoadXUp(int at)
return true; return true;
} }
else if (mIns[i].mType == ASMIT_INC && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[at].mAddress)
{
mIns[i].mType = ASMIT_LDX;
mIns[at].mType = ASMIT_INX;
mIns[at].mMode = ASMIM_IMPLIED;
while (i < at)
{
mIns[i].mLive |= LIVE_CPU_REG_X;
i++;
}
return true;
}
if (mIns[i].RequiresXReg() || mIns[i].ChangesXReg() || (mIns[i].mLive & LIVE_CPU_REG_X) || mIns[i].UsesZeroPage(mIns[at].mAddress)) if (mIns[i].RequiresXReg() || mIns[i].ChangesXReg() || (mIns[i].mLive & LIVE_CPU_REG_X) || mIns[i].UsesZeroPage(mIns[at].mAddress))
return false; return false;
@ -10914,6 +11023,19 @@ bool NativeCodeBasicBlock::MoveLoadYUp(int at)
return true; return true;
} }
else if (mIns[i].mType == ASMIT_INC && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[at].mAddress)
{
mIns[i].mType = ASMIT_LDY;
mIns[at].mType = ASMIT_INY;
mIns[at].mMode = ASMIM_IMPLIED;
while (i < at)
{
mIns[i].mLive |= LIVE_CPU_REG_Y;
i++;
}
return true;
}
if (mIns[i].RequiresYReg() || mIns[i].ChangesYReg() || (mIns[i].mLive & LIVE_CPU_REG_Y) || mIns[i].UsesZeroPage(mIns[at].mAddress)) if (mIns[i].RequiresYReg() || mIns[i].ChangesYReg() || (mIns[i].mLive & LIVE_CPU_REG_Y) || mIns[i].UsesZeroPage(mIns[at].mAddress))
return false; return false;
@ -13334,7 +13456,7 @@ bool NativeCodeBasicBlock::RemoveNops(void)
return changed; return changed;
} }
bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass)
{ {
if (!mVisited) if (!mVisited)
{ {
@ -16077,6 +16199,32 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
int a = mIns[i + 1].mAddress; mIns[i + 1].mAddress = mIns[i + 4].mAddress; mIns[i + 4].mAddress = a; int a = mIns[i + 1].mAddress; mIns[i + 1].mAddress = mIns[i + 4].mAddress; mIns[i + 4].mAddress = a;
progress = true; progress = true;
} }
#if 1
if (pass == 0 &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[i + 1].mFlags & NCIF_LOWER) &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[i + 3].mFlags & NCIF_UPPER) && (mIns[i + 3].mLinkerObject == mIns[i + 1].mLinkerObject) &&
mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE &&
mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1 &&
!(mIns[i + 5].mLive & LIVE_CPU_REG_A))
{
proc->ResetPatched();
if (CheckGlobalAddressSumYPointer(mIns[i + 2].mAddress, i + 6, -1))
{
assert(mIns[i + 3].mAddress == mIns[i + 1].mAddress);
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED;
proc->ResetPatched();
progress = PatchGlobalAddressSumYPointer(mIns[i + 2].mAddress, i + 6, -1, mIns[i + 3].mLinkerObject, mIns[i + 3].mAddress);
}
}
#endif
} }
if (i + 6 < mIns.Size()) if (i + 6 < mIns.Size())
@ -16280,9 +16428,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
} while (progress); } while (progress);
if (this->mTrueJump && this->mTrueJump->PeepHoleOptimizer(pass)) if (this->mTrueJump && this->mTrueJump->PeepHoleOptimizer(proc, pass))
changed = true; changed = true;
if (this->mFalseJump && this->mFalseJump->PeepHoleOptimizer(pass)) if (this->mFalseJump && this->mFalseJump->PeepHoleOptimizer(proc, pass))
changed = true; changed = true;
assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV);
@ -17290,7 +17438,7 @@ void NativeCodeProcedure::Optimize(void)
#endif #endif
#if 1 #if 1
ResetVisited(); ResetVisited();
if (mEntryBlock->PeepHoleOptimizer(step)) if (mEntryBlock->PeepHoleOptimizer(this, step))
changed = true; changed = true;
#endif #endif
#if 1 #if 1
@ -17509,11 +17657,15 @@ void NativeCodeProcedure::ResetEntryBlocks(void)
mBlocks[i]->mEntryBlocks.SetSize(0); mBlocks[i]->mEntryBlocks.SetSize(0);
} }
void NativeCodeProcedure::ResetPatched(void)
{
for (int i = 0; i < mBlocks.Size(); i++)
mBlocks[i]->mPatched = false;
}
void NativeCodeProcedure::ResetVisited(void) void NativeCodeProcedure::ResetVisited(void)
{ {
int i; for (int i = 0; i < mBlocks.Size(); i++)
for (i = 0; i < mBlocks.Size(); i++)
{ {
// assert(mBlocks[i]->mIns.Size() > 0 || (mBlocks[i]->mTrueJump != mBlocks[i] && mBlocks[i]->mFalseJump != mBlocks[i])); // assert(mBlocks[i]->mIns.Size() > 0 || (mBlocks[i]->mTrueJump != mBlocks[i] && mBlocks[i]->mFalseJump != mBlocks[i]));

View File

@ -124,7 +124,7 @@ public:
GrowingArray<NativeCodeBasicBlock*> mEntryBlocks; GrowingArray<NativeCodeBasicBlock*> mEntryBlocks;
int mOffset, mSize, mPlace, mNumEntries, mNumEntered, mFrameOffset; int mOffset, mSize, mPlace, mNumEntries, mNumEntered, mFrameOffset;
bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited, mLoopHead, mVisiting, mLocked; bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited, mLoopHead, mVisiting, mLocked, mPatched;
NativeCodeBasicBlock* mLoopHeadBlock; NativeCodeBasicBlock* mLoopHeadBlock;
NativeRegisterDataSet mDataSet, mNDataSet; NativeRegisterDataSet mDataSet, mNDataSet;
@ -145,7 +145,7 @@ public:
void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch); void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch);
bool RemoveNops(void); bool RemoveNops(void);
bool PeepHoleOptimizer(int pass); bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass);
void BlockSizeReduction(void); void BlockSizeReduction(void);
bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc); bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc);
@ -270,6 +270,9 @@ public:
void GlobalRegisterYMap(int reg); void GlobalRegisterYMap(int reg);
bool LocalRegisterXYMap(void); bool LocalRegisterXYMap(void);
bool ReduceLocalYPressure(void); bool ReduceLocalYPressure(void);
bool CheckGlobalAddressSumYPointer(int reg, int at, int yval);
bool PatchGlobalAddressSumYPointer(int reg, int at, int yval, LinkerObject * lobj, int address);
}; };
class NativeCodeProcedure class NativeCodeProcedure
@ -306,6 +309,7 @@ class NativeCodeProcedure
void BuildDataFlowSets(void); void BuildDataFlowSets(void);
void ResetEntryBlocks(void); void ResetEntryBlocks(void);
void ResetVisited(void); void ResetVisited(void);
void ResetPatched(void);
}; };

View File

@ -24,8 +24,8 @@ char tilemap[128 * 5] = {
#embed "../../../assets/uridium1 - Map (128x5).bin" #embed "../../../assets/uridium1 - Map (128x5).bin"
}; };
char spriteset[2048] = { char spriteset[4096] = {
#embed 2048 0 "../../../assets/uridium1 - Sprites.bin" #embed 4096 0 "../../../assets/uridium1 - Sprites.bin"
}; };
char xtileset[16][64]; char xtileset[16][64];
@ -325,6 +325,66 @@ void tiles_draw(unsigned x)
vic.ctrl2 = VIC_CTRL2_MCM + xs; vic.ctrl2 = VIC_CTRL2_MCM + xs;
} }
struct Enemy
{
int px;
byte py;
sbyte dx;
byte n;
} enemies[5];
int spx = 40;
int vpx = 16;
int ax = 0;
char spy = 100;
char fdelay = 0;
void enemies_move(void)
{
bool elive = false;
for(char i=0; i<5; i++)
{
if (enemies[i].n)
{
enemies[i].n--;
enemies[i].px += enemies[i].dx;
int rx = enemies[i].px - spx;
if (enemies[i].dx < 0)
{
if (rx < -24)
enemies[i].n = 0;
}
else
{
if (rx > 320)
enemies[i].n = 0;
}
spr_move(2 + i, rx + 24, enemies[i].py + 50);
elive = true;
}
}
if (!elive)
{
for(char i=0; i<5; i++)
{
sbyte v = 4 + (rand() & 1);
enemies[i].py = 20 + 30 * i;
enemies[i].dx = vpx < 0 ? v : -v;
enemies[i].px = (vpx < 0 ? spx - 56 : spx + 320) + (rand() & 31);
enemies[i].n = 100;
int rx = enemies[i].px - spx;
spr_set(2 + i, true, rx + 24, enemies[i].py + 50, 96, VCOL_YELLOW, true, false, false);
}
}
}
int main(void) int main(void)
{ {
cia_init(); cia_init();
@ -369,15 +429,10 @@ int main(void)
lastShot->ty = 6; lastShot->ty = 6;
spr_set(0, true, 160, 100, 64, VCOL_BLUE, true, false, false); spr_set(0, true, 160, 100, 64, VCOL_BLUE, true, false, false);
spr_set(1, true, 160, 100, 64 + 16, VCOL_MED_GREY, true, false, false); spr_set(7, true, 160, 100, 64 + 16, VCOL_MED_GREY, true, false, false);
vic.spr_priority = 2; vic.spr_priority = 2;
int spx = 40; vpx = 2;
int vpx = 16;
int ax = 0;
char spy = 100;
char fdelay = 0;
for(;;) for(;;)
{ {
joy_poll(0); joy_poll(0);
@ -401,12 +456,12 @@ int main(void)
if (vpx >= 32) if (vpx >= 32)
{ {
spr_image(0, 64); spr_image(0, 64);
spr_image(1, 64 + 16); spr_image(7, 64 + 16);
} }
else else
{ {
spr_image(0, 76 + (vpx >> 3)); spr_image(0, 76 + (vpx >> 3));
spr_image(1, 76 + (vpx >> 3) + 16); spr_image(7, 76 + (vpx >> 3) + 16);
} }
} }
else if (ax < 0) else if (ax < 0)
@ -419,12 +474,12 @@ int main(void)
if (vpx <= -32) if (vpx <= -32)
{ {
spr_image(0, 72); spr_image(0, 72);
spr_image(1, 72 + 16); spr_image(7, 72 + 16);
} }
else else
{ {
spr_image(0, 68 - (vpx >> 3)); spr_image(0, 68 - (vpx >> 3));
spr_image(1, 68 - (vpx >> 3) + 16); spr_image(7, 68 - (vpx >> 3) + 16);
} }
} }
@ -437,16 +492,19 @@ int main(void)
} }
spr_move(0, 172 - 4 * vpx, 50 + spy); spr_move(0, 172 - 4 * vpx, 50 + spy);
spr_move(1, 180 - 4 * vpx, 58 + spy); spr_move(7, 180 - 4 * vpx, 58 + spy);
vic.color_border++; vic.color_border++;
vic_waitLine(82); vic_waitLine(82);
vic.color_border++; vic.color_border++;
tiles_draw(spx); tiles_draw(spx);
vic.color_border--; vic.color_border--;
enemies_move();
vic.color_border--; vic.color_border--;
spx += vpx >> 1; spx += vpx >> 1;
spx &= 4095; spx &= 4095;
} }

Binary file not shown.