Optimize indexed static variable access

This commit is contained in:
drmortalwombat 2022-02-26 18:10:23 +01:00
parent 2719b1156b
commit 756245694f
5 changed files with 239 additions and 27 deletions

View File

@ -2938,8 +2938,7 @@ __asm fcmp
{
lda accu + 3
eor tmp + 3
and #$80
beq W1
bpl W1
// different sig, check zero case
@ -3006,8 +3005,7 @@ __asm inp_binop_cmp_f32
lda accu + 3
eor $03, x
and #$80
beq W1
bpl W1
// different sig, check zero case

View File

@ -9738,6 +9738,102 @@ bool NativeCodeBasicBlock::FindImmediateStore(int at, int reg, const NativeCodeI
}
bool NativeCodeBasicBlock::CheckGlobalAddressSumYPointer(int reg, int at, int yval)
{
if (!mPatched)
{
mPatched = true;
while (at < mIns.Size())
{
NativeCodeInstruction& ins(mIns[at]);
if (ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1))
return false;
else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg)
{
if (yval < 0)
return false;
else if (!(ins.mLive & LIVE_MEM))
return true;
}
if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_IMMEDIATE)
yval = ins.mAddress;
else if (ins.mType == ASMIT_INY && yval >= 0)
yval = (yval + 1) & 255;
else if (ins.mType == ASMIT_DEY && yval >= 0)
yval = (yval - 1) & 255;
else if (ins.ChangesYReg())
yval = -1;
at++;
}
if (mTrueJump && !mTrueJump->CheckGlobalAddressSumYPointer(reg, 0, yval))
return false;
if (mFalseJump && !mFalseJump->CheckGlobalAddressSumYPointer(reg, 0, yval))
return false;
}
return true;
}
bool NativeCodeBasicBlock::PatchGlobalAddressSumYPointer(int reg, int at, int yval, LinkerObject* lobj, int address)
{
bool changed = false;
if (!mPatched)
{
mPatched = true;
while (at < mIns.Size())
{
NativeCodeInstruction& ins(mIns[at]);
assert(!(ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == reg + 1)));
if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg)
{
assert(yval >= 0);
bool done = !(ins.mLive & LIVE_MEM);
ins.mMode = ASMIM_ABSOLUTE_Y;
ins.mLinkerObject = lobj;
ins.mAddress = address + yval;
if (ins.mLive & LIVE_CPU_REG_Y)
mIns.Insert(at + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yval));
mIns.Insert(at, NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, reg));
at++;
changed = true;
if (done)
return changed;
}
else if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_IMMEDIATE)
yval = ins.mAddress;
else if (ins.mType == ASMIT_INY && yval >= 0)
yval = (yval + 1) & 255;
else if (ins.mType == ASMIT_DEY && yval >= 0)
yval = (yval - 1) & 255;
else if (ins.ChangesYReg())
yval = -1;
at++;
}
if (mTrueJump && mTrueJump->PatchGlobalAddressSumYPointer(reg, 0, yval, lobj, address))
changed = true;
if (mFalseJump && mFalseJump->PatchGlobalAddressSumYPointer(reg, 0, yval, lobj, address))
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins, uint32& flags, int& addr)
{
flags = 0;
@ -10849,6 +10945,19 @@ bool NativeCodeBasicBlock::MoveLoadXUp(int at)
return true;
}
else if (mIns[i].mType == ASMIT_INC && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[at].mAddress)
{
mIns[i].mType = ASMIT_LDX;
mIns[at].mType = ASMIT_INX;
mIns[at].mMode = ASMIM_IMPLIED;
while (i < at)
{
mIns[i].mLive |= LIVE_CPU_REG_X;
i++;
}
return true;
}
if (mIns[i].RequiresXReg() || mIns[i].ChangesXReg() || (mIns[i].mLive & LIVE_CPU_REG_X) || mIns[i].UsesZeroPage(mIns[at].mAddress))
return false;
@ -10914,6 +11023,19 @@ bool NativeCodeBasicBlock::MoveLoadYUp(int at)
return true;
}
else if (mIns[i].mType == ASMIT_INC && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[at].mAddress)
{
mIns[i].mType = ASMIT_LDY;
mIns[at].mType = ASMIT_INY;
mIns[at].mMode = ASMIM_IMPLIED;
while (i < at)
{
mIns[i].mLive |= LIVE_CPU_REG_Y;
i++;
}
return true;
}
if (mIns[i].RequiresYReg() || mIns[i].ChangesYReg() || (mIns[i].mLive & LIVE_CPU_REG_Y) || mIns[i].UsesZeroPage(mIns[at].mAddress))
return false;
@ -13334,7 +13456,7 @@ bool NativeCodeBasicBlock::RemoveNops(void)
return changed;
}
bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass)
{
if (!mVisited)
{
@ -16077,6 +16199,32 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
int a = mIns[i + 1].mAddress; mIns[i + 1].mAddress = mIns[i + 4].mAddress; mIns[i + 4].mAddress = a;
progress = true;
}
#if 1
if (pass == 0 &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[i + 1].mFlags & NCIF_LOWER) &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[i + 3].mFlags & NCIF_UPPER) && (mIns[i + 3].mLinkerObject == mIns[i + 1].mLinkerObject) &&
mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE &&
mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1 &&
!(mIns[i + 5].mLive & LIVE_CPU_REG_A))
{
proc->ResetPatched();
if (CheckGlobalAddressSumYPointer(mIns[i + 2].mAddress, i + 6, -1))
{
assert(mIns[i + 3].mAddress == mIns[i + 1].mAddress);
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED;
proc->ResetPatched();
progress = PatchGlobalAddressSumYPointer(mIns[i + 2].mAddress, i + 6, -1, mIns[i + 3].mLinkerObject, mIns[i + 3].mAddress);
}
}
#endif
}
if (i + 6 < mIns.Size())
@ -16280,9 +16428,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
} while (progress);
if (this->mTrueJump && this->mTrueJump->PeepHoleOptimizer(pass))
if (this->mTrueJump && this->mTrueJump->PeepHoleOptimizer(proc, pass))
changed = true;
if (this->mFalseJump && this->mFalseJump->PeepHoleOptimizer(pass))
if (this->mFalseJump && this->mFalseJump->PeepHoleOptimizer(proc, pass))
changed = true;
assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV);
@ -17290,7 +17438,7 @@ void NativeCodeProcedure::Optimize(void)
#endif
#if 1
ResetVisited();
if (mEntryBlock->PeepHoleOptimizer(step))
if (mEntryBlock->PeepHoleOptimizer(this, step))
changed = true;
#endif
#if 1
@ -17509,11 +17657,15 @@ void NativeCodeProcedure::ResetEntryBlocks(void)
mBlocks[i]->mEntryBlocks.SetSize(0);
}
void NativeCodeProcedure::ResetPatched(void)
{
for (int i = 0; i < mBlocks.Size(); i++)
mBlocks[i]->mPatched = false;
}
void NativeCodeProcedure::ResetVisited(void)
{
int i;
for (i = 0; i < mBlocks.Size(); i++)
for (int i = 0; i < mBlocks.Size(); i++)
{
// assert(mBlocks[i]->mIns.Size() > 0 || (mBlocks[i]->mTrueJump != mBlocks[i] && mBlocks[i]->mFalseJump != mBlocks[i]));

View File

@ -124,7 +124,7 @@ public:
GrowingArray<NativeCodeBasicBlock*> mEntryBlocks;
int mOffset, mSize, mPlace, mNumEntries, mNumEntered, mFrameOffset;
bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited, mLoopHead, mVisiting, mLocked;
bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited, mLoopHead, mVisiting, mLocked, mPatched;
NativeCodeBasicBlock* mLoopHeadBlock;
NativeRegisterDataSet mDataSet, mNDataSet;
@ -145,7 +145,7 @@ public:
void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch);
bool RemoveNops(void);
bool PeepHoleOptimizer(int pass);
bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass);
void BlockSizeReduction(void);
bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc);
@ -270,6 +270,9 @@ public:
void GlobalRegisterYMap(int reg);
bool LocalRegisterXYMap(void);
bool ReduceLocalYPressure(void);
bool CheckGlobalAddressSumYPointer(int reg, int at, int yval);
bool PatchGlobalAddressSumYPointer(int reg, int at, int yval, LinkerObject * lobj, int address);
};
class NativeCodeProcedure
@ -306,6 +309,7 @@ class NativeCodeProcedure
void BuildDataFlowSets(void);
void ResetEntryBlocks(void);
void ResetVisited(void);
void ResetPatched(void);
};

View File

@ -24,8 +24,8 @@ char tilemap[128 * 5] = {
#embed "../../../assets/uridium1 - Map (128x5).bin"
};
char spriteset[2048] = {
#embed 2048 0 "../../../assets/uridium1 - Sprites.bin"
char spriteset[4096] = {
#embed 4096 0 "../../../assets/uridium1 - Sprites.bin"
};
char xtileset[16][64];
@ -325,6 +325,66 @@ void tiles_draw(unsigned x)
vic.ctrl2 = VIC_CTRL2_MCM + xs;
}
struct Enemy
{
int px;
byte py;
sbyte dx;
byte n;
} enemies[5];
int spx = 40;
int vpx = 16;
int ax = 0;
char spy = 100;
char fdelay = 0;
void enemies_move(void)
{
bool elive = false;
for(char i=0; i<5; i++)
{
if (enemies[i].n)
{
enemies[i].n--;
enemies[i].px += enemies[i].dx;
int rx = enemies[i].px - spx;
if (enemies[i].dx < 0)
{
if (rx < -24)
enemies[i].n = 0;
}
else
{
if (rx > 320)
enemies[i].n = 0;
}
spr_move(2 + i, rx + 24, enemies[i].py + 50);
elive = true;
}
}
if (!elive)
{
for(char i=0; i<5; i++)
{
sbyte v = 4 + (rand() & 1);
enemies[i].py = 20 + 30 * i;
enemies[i].dx = vpx < 0 ? v : -v;
enemies[i].px = (vpx < 0 ? spx - 56 : spx + 320) + (rand() & 31);
enemies[i].n = 100;
int rx = enemies[i].px - spx;
spr_set(2 + i, true, rx + 24, enemies[i].py + 50, 96, VCOL_YELLOW, true, false, false);
}
}
}
int main(void)
{
cia_init();
@ -369,15 +429,10 @@ int main(void)
lastShot->ty = 6;
spr_set(0, true, 160, 100, 64, VCOL_BLUE, true, false, false);
spr_set(1, true, 160, 100, 64 + 16, VCOL_MED_GREY, true, false, false);
spr_set(7, true, 160, 100, 64 + 16, VCOL_MED_GREY, true, false, false);
vic.spr_priority = 2;
int spx = 40;
int vpx = 16;
int ax = 0;
char spy = 100;
char fdelay = 0;
vpx = 2;
for(;;)
{
joy_poll(0);
@ -401,12 +456,12 @@ int main(void)
if (vpx >= 32)
{
spr_image(0, 64);
spr_image(1, 64 + 16);
spr_image(7, 64 + 16);
}
else
{
spr_image(0, 76 + (vpx >> 3));
spr_image(1, 76 + (vpx >> 3) + 16);
spr_image(7, 76 + (vpx >> 3) + 16);
}
}
else if (ax < 0)
@ -419,12 +474,12 @@ int main(void)
if (vpx <= -32)
{
spr_image(0, 72);
spr_image(1, 72 + 16);
spr_image(7, 72 + 16);
}
else
{
spr_image(0, 68 - (vpx >> 3));
spr_image(1, 68 - (vpx >> 3) + 16);
spr_image(7, 68 - (vpx >> 3) + 16);
}
}
@ -437,16 +492,19 @@ int main(void)
}
spr_move(0, 172 - 4 * vpx, 50 + spy);
spr_move(1, 180 - 4 * vpx, 58 + spy);
spr_move(7, 180 - 4 * vpx, 58 + spy);
vic.color_border++;
vic_waitLine(82);
vic.color_border++;
tiles_draw(spx);
vic.color_border--;
enemies_move();
vic.color_border--;
spx += vpx >> 1;
spx &= 4095;
}

Binary file not shown.