Loop index optimizations

This commit is contained in:
drmortalwombat 2022-10-03 10:50:24 +02:00
parent 27a227501a
commit 3e59f47748
5 changed files with 824 additions and 25 deletions

View File

@ -89,6 +89,85 @@ e1:
}
__asm irq2
{
pha
txa
pha
tya
pha
lda #$35
sta $01
asl $d019
ldx nextIRQ
l1:
lda rasterIRQNext, x
cmp #$ff
beq e1
ldy rasterIRQIndex, x
tax
lda rasterIRQLow, y
sta ji + 1
lda rasterIRQHigh, y
sta ji + 2
ji:
jsr $0000
inc nextIRQ
ldx nextIRQ
lda rasterIRQNext, x
cmp #$ff
beq e2
// carry is cleared at this point
tay
dey
sbc #2
cmp $d012
bcc l1
sty $d012
ex:
lda PLAShadow
sta $01
pla
tay
pla
tax
pla
rti
e2:
ldx npos
stx tpos
inc rirq_count
bit $d011
bmi e1
sta $d012
jmp ex
e1:
ldx #0
stx nextIRQ
ldy rasterIRQNext
dey
sty $d012
jmp ex
}
__asm irq1
{
lda $d019
@ -284,7 +363,7 @@ void rirq_clear(byte n)
rasterIRQRows[n] = 255;
}
void rirq_init(bool kernalIRQ)
void rirq_init_kernal(void)
{
for(byte i=0; i<NUM_IRQS; i++)
{
@ -295,24 +374,64 @@ void rirq_init(bool kernalIRQ)
__asm
{
sei
#if 0
// disable CIA interrupts
lda #$7f
sta $dc0d
sta $dd0d
#endif
}
if (kernalIRQ)
*(void **)0x0314 = irq1;
else
*(void **)0xfffe = irq0;
*(void **)0x0314 = irq1;
vic.intr_enable = 1;
vic.ctrl1 &= 0x7f;
vic.raster = 255;
}
void rirq_init_io(void)
{
for(byte i=0; i<NUM_IRQS; i++)
{
rasterIRQRows[i] = 255;
rasterIRQIndex[i] = i;
}
__asm
{
sei
}
*(void **)0xfffe = irq0;
vic.intr_enable = 1;
vic.ctrl1 &= 0x7f;
vic.raster = 255;
}
void rirq_init_memmap(void)
{
for(byte i=0; i<NUM_IRQS; i++)
{
rasterIRQRows[i] = 255;
rasterIRQIndex[i] = i;
}
__asm
{
sei
}
*(void **)0xfffe = irq2;
vic.intr_enable = 1;
vic.ctrl1 &= 0x7f;
vic.raster = 255;
}
void rirq_init(bool kernalIRQ)
{
if (kernalIRQ)
rirq_init_kernal();
else
rirq_init_io();
}
void rirq_wait(void)

View File

@ -124,7 +124,13 @@ inline void rirq_move(byte n, byte row);
// Initialize the raster IRQ system with either the kernal IRQ vector
// or the hardware IRQ vector if the kernal ROM is turned off (which is
// the less resource hungry option)
void rirq_init(bool kernalIRQ);
inline void rirq_init(bool kernalIRQ);
void rirq_init_kernal(void);
void rirq_init_io(void);
void rirq_init_memmap(void);
// Start raster IRQ
void rirq_start(void);

View File

@ -5756,7 +5756,15 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
else
vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND;
break;
#if 1
case IA_MODU:
vr.LimitMin(0);
if (ins->mSrc[0].mTemp < 0)
vr.LimitMax(ins->mSrc[0].mIntConst - 1);
else if (ins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND)
vr.LimitMax(ins->mSrc[0].mRange.mMaxValue - 1);
break;
#endif
default:
vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND;
}
@ -7200,6 +7208,20 @@ bool InterCodeBasicBlock::SimplifyPointerOffsets(void)
return true;
}
static bool IsValidSignedIntRange(InterType t, int64 value)
{
switch (t)
{
case IT_INT8:
return value >= -128 && value <= 127;
case IT_INT16:
return value >= -32768 && value <= 32767;
case IT_INT32:
return true;
default:
return false;
}
}
bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArray& tvalue, int& spareTemps)
{
@ -7353,6 +7375,40 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra
}
} break;
case IC_RELATIONAL_OPERATOR:
if (ins->mOperator == IA_CMPLS || ins->mOperator == IA_CMPLES || ins->mOperator == IA_CMPGS || ins->mOperator == IA_CMPGES)
{
if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && ltvalue[ins->mSrc[1].mTemp])
{
InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp];
if (pins->mCode == IC_BINARY_OPERATOR && pins->mOperator == IA_ADD)
{
if (pins->mSrc[0].mTemp < 0)
{
if (IsValidSignedIntRange(ins->mSrc[0].mType, ins->mSrc[0].mIntConst - pins->mSrc[0].mIntConst))
{
ins->mSrc[1].Forward(pins->mSrc[1]);
pins->mSrc[1].mFinal = false;
ins->mSrc[0].mIntConst -= pins->mSrc[0].mIntConst;
changed = true;
}
}
else if (pins->mSrc[1].mTemp < 0)
{
if (IsValidSignedIntRange(ins->mSrc[0].mType, ins->mSrc[0].mIntConst - pins->mSrc[1].mIntConst))
{
ins->mSrc[1].Forward(pins->mSrc[0]);
pins->mSrc[0].mFinal = false;
ins->mSrc[0].mIntConst -= pins->mSrc[1].mIntConst;
changed = true;
}
}
}
}
}
break;
case IC_LEA:
if (ins->mSrc[1].mMemory == IM_INDIRECT && ins->mSrc[1].mTemp >= 0 && tvalue[ins->mSrc[1].mTemp])
{
@ -8271,6 +8327,16 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray&
j++;
}
mLoadStoreInstructions.SetSize(k);
if (nins)
{
// Check self destruction of source operaand
int l = 0;
while (l < nins->mNumOperands && t != nins->mSrc[l].mTemp)
l++;
if (l != nins->mNumOperands)
nins = nullptr;
}
}
if (nins)
@ -10822,6 +10888,15 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
else if (cins->mCode == IC_BINARY_OPERATOR && cins->mOperator == IA_ADD && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0)
toffset += cins->mSrc[0].mIntConst;
else
break;
}
else
{
int k = 0;
while (k < cins->mNumOperands && cins->mSrc[k].mTemp != dt)
k++;
if (k != cins->mNumOperands)
break;
}
@ -10973,6 +11048,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mVisited = true;
CheckFinalLocal();
if (mTrueJump) mTrueJump->CheckFinalLocal();
if (mFalseJump) mFalseJump->CheckFinalLocal();
// Remove none instructions
@ -11751,6 +11828,29 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
#endif
}
if (i + 3 < mInstructions.Size())
{
if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 2]->mOperator == IA_ADD &&
mInstructions[i + 2]->mSrc[1].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[1].mFinal &&
mInstructions[i + 3]->mCode == IC_LEA && mInstructions[i + 3]->mSrc[1].mTemp < 0 &&
mInstructions[i + 3]->mSrc[0].mTemp == mInstructions[i + 2]->mDst.mTemp && mInstructions[i + 3]->mSrc[0].mFinal)
{
int d = mInstructions[i + 0]->mSrc[0].mIntConst * mInstructions[i + 1]->mSrc[0].mIntConst;
mInstructions[i + 3]->mSrc[1].mIntConst += d;
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 1]->mDst.mRange.mMinValue -= d; mInstructions[i + 1]->mDst.mRange.mMaxValue -= d;
mInstructions[i + 2]->mSrc[1].mRange.mMinValue -= d; mInstructions[i + 2]->mSrc[1].mRange.mMaxValue -= d;
mInstructions[i + 2]->mDst.mRange.mMinValue -= d; mInstructions[i + 2]->mDst.mRange.mMaxValue -= d;
mInstructions[i + 3]->mSrc[0].mRange.mMinValue -= d; mInstructions[i + 3]->mSrc[0].mRange.mMaxValue -= d;
mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
}
#if 1
@ -11836,6 +11936,53 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
} while (changed);
// Check case of cmp signed immediate
if (mFalseJump && mInstructions.Size() > 3)
{
int nins = mInstructions.Size();
if (mInstructions[nins - 1]->mCode == IC_BRANCH &&
mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp &&
mInstructions[nins - 2]->mOperator == IA_CMPLS && mInstructions[nins - 2]->mSrc[0].mTemp < 0)
{
int j = nins - 2;
while (j >= 0 && mInstructions[j]->mDst.mTemp != mInstructions[nins - 2]->mSrc[1].mTemp)
j--;
if (j >= 0 && mInstructions[j]->mCode == IC_LOAD_TEMPORARY)
{
int si = mInstructions[j]->mSrc[0].mTemp, di = mInstructions[j]->mDst.mTemp, ioffset = 0;
InterInstruction* ains = nullptr;
int k = j + 1;
while (k < nins - 2)
{
InterInstruction* ins = mInstructions[k];
if (ins->mDst.mTemp == si)
{
if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp == si)
{
ioffset += ins->mSrc[0].mIntConst;
ains = ins;
}
else
break;
}
k++;
}
if (k == nins - 2)
{
if (ains)
{
mInstructions[nins - 2]->mSrc[1] = ains->mDst;
mInstructions[nins - 2]->mSrc[0].mIntConst += ioffset;
}
}
}
}
}
CheckFinalLocal();
if (mTrueJump) mTrueJump->PeepholeOptimization(staticVars);
@ -12363,6 +12510,9 @@ void InterCodeProcedure::PeepholeOptimization(void)
TempForwarding();
RemoveUnusedInstructions();
Disassemble("Precheck Final");
CheckFinal();
ResetVisited();
mEntryBlock->PeepholeOptimization(mModule->mGlobalVars);
}
@ -12751,6 +12901,8 @@ void InterCodeProcedure::EliminateAliasValues()
void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory)
{
DisassembleDebug("Load/Store forwardingY");
bool changed;
do {
GrowingInstructionPtrArray gipa(nullptr);

View File

@ -880,6 +880,80 @@ bool NativeCodeInstruction::SwapXYReg(void)
}
static void UpdateCollisionSet(NumberSet& liveTemps, NumberSet* collisionSets, int temp)
{
int i;
if (temp >= 0 && !liveTemps[temp])
{
for (i = 0; i < liveTemps.Size(); i++)
{
if (liveTemps[i])
{
collisionSets[i] += temp;
collisionSets[temp] += i;
}
}
liveTemps += temp;
}
}
void NativeCodeInstruction::BuildCollisionTable(NumberSet& liveTemps, NumberSet* collisionSets)
{
if (mMode == ASMIM_ZERO_PAGE)
{
if (ChangesAddress())
liveTemps -= mAddress;
if (UsesAddress())
UpdateCollisionSet(liveTemps, collisionSets, mAddress);
}
if (mMode == ASMIM_INDIRECT_Y)
{
UpdateCollisionSet(liveTemps, collisionSets, mAddress);
UpdateCollisionSet(liveTemps, collisionSets, mAddress + 1);
}
if (mType == ASMIT_JSR)
{
for(int i= BC_REG_ACCU; i< BC_REG_ACCU + 4; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
for (int i = BC_REG_WORK; i < BC_REG_WORK + 4; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
if (mFlags & NCIF_RUNTIME)
{
if (mFlags & NCIF_USE_ZP_32_X)
{
for (int i = mParam; i < mParam + 4; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
}
if (mFlags & NCIF_FEXEC)
{
for (int i = BC_REG_FPARAMS; i < BC_REG_FPARAMS_END; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
}
}
else
{
for (int i = BC_REG_FPARAMS; i < BC_REG_FPARAMS_END; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
if (mLinkerObject && mLinkerObject->mProc)
{
for (int i = BC_REG_TMP; i < BC_REG_TMP + mLinkerObject->mProc->mCallerSavedTemps; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
}
else
{
for (int i = BC_REG_TMP; i < BC_REG_TMP_SAVED; i++)
UpdateCollisionSet(liveTemps, collisionSets, i);
}
}
}
}
bool NativeCodeInstruction::ReplaceXRegWithYReg(void)
{
@ -10714,6 +10788,39 @@ bool NativeCodeBasicBlock::RemoveUnusedResultInstructions(void)
return changed;
}
void NativeCodeBasicBlock::BuildCollisionTable(NumberSet* collisionSets)
{
if (!mVisited)
{
mVisited = true;
NumberSet requiredTemps(mExitRequiredRegs);
int i, j;
for (i = 0; i < mExitRequiredRegs.Size(); i++)
{
if (mExitRequiredRegs[i])
{
for (j = 0; j < mExitRequiredRegs.Size(); j++)
{
if (mExitRequiredRegs[j])
{
collisionSets[i] += j;
}
}
}
}
for (i = mIns.Size() - 1; i >= 0; i--)
{
mIns[i].BuildCollisionTable(requiredTemps, collisionSets);
}
if (mTrueJump) mTrueJump->BuildCollisionTable(collisionSets);
if (mFalseJump) mFalseJump->BuildCollisionTable(collisionSets);
}
}
void NativeCodeBasicBlock::BuildDominatorTree(NativeCodeBasicBlock* from)
{
if (from == this)
@ -11370,6 +11477,39 @@ bool NativeCodeBasicBlock::ReduceLocalYPressure(void)
CheckLive();
#if 1
if (mLoopHead && mFalseJump && !mEntryRequiredRegs[CPU_REG_X] && !mExitRequiredRegs[CPU_REG_X] && mEntryBlocks.Size() == 2 && (mFalseJump == this || mTrueJump == this))
{
NativeCodeBasicBlock* pblock, * nblock;
if (mTrueJump == this)
nblock = mFalseJump;
else
nblock = mTrueJump;
if (mEntryBlocks[0] == this)
pblock = mEntryBlocks[1];
else
pblock = mEntryBlocks[0];
if (!pblock->mFalseJump && !nblock->mEntryRequiredRegs[CPU_REG_Y])
{
int pz = pblock->mIns.Size();
if (mEntryRequiredRegs[CPU_REG_Y] && pz > 0 && pblock->mIns[pz - 1].mType == ASMIT_LDY && pblock->mIns[pz - 1].mMode == ASMIM_IMMEDIATE)
{
if (CanReplaceYRegWithXReg(0, mIns.Size()))
{
mEntryRequiredRegs += CPU_REG_X; mEntryRequiredRegs -= CPU_REG_Y;
mExitRequiredRegs += CPU_REG_X; mExitRequiredRegs -= CPU_REG_Y;
pblock->mExitRequiredRegs += CPU_REG_X; pblock->mExitRequiredRegs -= CPU_REG_Y;
ReplaceYRegWithXReg(0, mIns.Size());
pblock->mIns[pz - 1].mType = ASMIT_LDX;
changed = true;
}
}
}
}
#endif
int start = 0;
while (start < mIns.Size())
@ -14207,6 +14347,32 @@ bool NativeCodeBasicBlock::HasTailSTX(int& addr, int& index) const
return false;
}
bool NativeCodeBasicBlock::HasTailSTY(int& addr, int& index) const
{
int i = mIns.Size();
while (i > 0)
{
i--;
if (mIns[i].ChangesYReg())
return false;
if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE)
{
index = i;
addr = mIns[i].mAddress;
i++;
while (i < mIns.Size())
{
if (mIns[i].ReferencesZeroPage(addr))
return false;
i++;
}
return true;
}
}
return false;
}
void NativeCodeBasicBlock::AddEntryBlock(NativeCodeBasicBlock* block)
{
@ -14397,7 +14563,7 @@ bool NativeCodeBasicBlock::PropagateSinglePath(void)
#endif
if (mTrueJump && mFalseJump)
if (mTrueJump && mFalseJump && mExitRequiredRegs.Size())
{
uint32 live = 0;
if (mExitRequiredRegs[CPU_REG_X])
@ -14871,6 +15037,27 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
}
}
if (eb->HasTailSTY(addr, index))
{
i = 1;
while (i < mEntryBlocks.Size() && mEntryBlocks[i]->HasTailSTY(taddr, tindex) && taddr == addr)
i++;
if (i == mEntryBlocks.Size())
{
mIns.Insert(0, eb->mIns[index]);
mIns[0].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_X | LIVE_CPU_REG_Y;
for (int i = 0; i < mEntryBlocks.Size(); i++)
{
NativeCodeBasicBlock* b = mEntryBlocks[i];
b->HasTailSTY(taddr, tindex);
for (int j = tindex + 1; j < b->mIns.Size(); j++)
b->mIns[j].mLive |= LIVE_CPU_REG_Y;
b->mIns.Remove(tindex);
}
changed = true;
}
}
break;
}
}
@ -15509,6 +15696,227 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
return changed;
}
void NativeCodeBasicBlock::DoCrossBlockAShortcut(int addr)
{
mExitRequiredRegs += CPU_REG_A;
int i = mIns.Size();
while (i > 0)
{
i--;
mIns[i].mLive |= LIVE_CPU_REG_A;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return;
}
}
bool NativeCodeBasicBlock::CanCrossBlockAShortcut(int addr)
{
int i = mIns.Size();
while (i > 0)
{
i--;
if (mIns[i].ChangesAccu())
return false;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return true;
if (mIns[i].ReferencesZeroPage(addr))
return false;
}
return false;
}
void NativeCodeBasicBlock::DoCrossBlockXShortcut(int addr)
{
mExitRequiredRegs += CPU_REG_X;
int i = mIns.Size();
while (i > 0)
{
i--;
mIns[i].mLive |= LIVE_CPU_REG_X;
if (mIns[i].mType == ASMIT_STX && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
{
mIns.Insert(i, NativeCodeInstruction(ASMIT_TAX));
return;
}
}
}
bool NativeCodeBasicBlock::CanCrossBlockXShortcut(int addr)
{
int i = mIns.Size();
while (i > 0)
{
i--;
if (mIns[i].ChangesXReg())
return false;
if (mIns[i].mType == ASMIT_STX && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return true;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr && !(mIns[i].mLive & LIVE_CPU_REG_Z))
return true;
if (mIns[i].ReferencesZeroPage(addr))
return false;
}
return false;
}
void NativeCodeBasicBlock::DoCrossBlockYShortcut(int addr)
{
mExitRequiredRegs += CPU_REG_Y;
int i = mIns.Size();
while (i > 0)
{
i--;
mIns[i].mLive |= LIVE_CPU_REG_Y;
if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
{
mIns.Insert(i, NativeCodeInstruction(ASMIT_TAY));
return;
}
}
}
bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr)
{
int i = mIns.Size();
while (i > 0)
{
i--;
if (mIns[i].ChangesYReg())
return false;
if (mIns[i].mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr)
return true;
if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr && !(mIns[i].mLive & LIVE_CPU_REG_Z))
return true;
if (mIns[i].ReferencesZeroPage(addr))
return false;
}
return false;
}
bool NativeCodeBasicBlock::CrossBlockXYShortcut(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (!mLoopHead && mEntryBlocks.Size() > 1)
{
CheckLive();
bool xvalid = !mEntryRequiredRegs[CPU_REG_X];
bool yvalid = !mEntryRequiredRegs[CPU_REG_Y];
bool avalid = !mEntryRequiredRegs[CPU_REG_A];
int i = 0;
while (i < mIns.Size() && (xvalid || yvalid || avalid))
{
if (xvalid && mIns[i].ChangesAccu())
{
if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
int k = i;
while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress))
k--;
if (k == 0)
{
k = 0;
while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockAShortcut(mIns[i].mAddress))
k++;
if (k == mEntryBlocks.Size())
{
for (int k = 0; k < mEntryBlocks.Size(); k++)
mEntryBlocks[k]->DoCrossBlockAShortcut(mIns[i].mAddress);
changed = true;
mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED;
mEntryRequiredRegs += CPU_REG_A;
for (int k = 0; k < i; k++)
mIns[k].mLive |= LIVE_CPU_REG_A;
}
}
}
xvalid = false;
}
if (xvalid && mIns[i].ChangesXReg())
{
if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
int k = i;
while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress))
k--;
if (k == 0)
{
k = 0;
while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockXShortcut(mIns[i].mAddress))
k++;
if (k == mEntryBlocks.Size())
{
for (int k = 0; k < mEntryBlocks.Size(); k++)
mEntryBlocks[k]->DoCrossBlockXShortcut(mIns[i].mAddress);
changed = true;
mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED;
mEntryRequiredRegs += CPU_REG_X;
for (int k = 0; k < i; k++)
mIns[k].mLive |= LIVE_CPU_REG_X;
}
}
}
xvalid = false;
}
if (yvalid && mIns[i].ChangesYReg())
{
if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
int k = i;
while (k > 0 && !mIns[k - 1].ReferencesZeroPage(mIns[i].mAddress))
k--;
if (k == 0)
{
k = 0;
while (k < mEntryBlocks.Size() && mEntryBlocks[k]->CanCrossBlockYShortcut(mIns[i].mAddress))
k++;
if (k == mEntryBlocks.Size())
{
for (int k = 0; k < mEntryBlocks.Size(); k++)
mEntryBlocks[k]->DoCrossBlockYShortcut(mIns[i].mAddress);
changed = true;
mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED;
mEntryRequiredRegs += CPU_REG_Y;
for (int k = 0; k < i; k++)
mIns[k].mLive |= LIVE_CPU_REG_Y;
}
}
}
yvalid = false;
}
i++;
}
CheckLive();
}
if (mTrueJump && mTrueJump->CrossBlockXYShortcut())
changed = true;
if (mFalseJump && mFalseJump->CrossBlockXYShortcut())
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::FindPageStartAddress(int at, int reg, int& addr)
{
int j = at - 2;
@ -19589,8 +19997,6 @@ bool NativeCodeBasicBlock::GlobalValueForwarding(void)
mIns.Insert(i + 1, NativeCodeInstruction(carryop));
}
CheckLive();
if (this->mTrueJump && this->mTrueJump->GlobalValueForwarding())
changed = true;
if (this->mFalseJump && this->mFalseJump->GlobalValueForwarding())
@ -19960,6 +20366,14 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
lblock->mTrueJump = lblock;
lblock->mFalseJump = eblock;
lblock->mEntryRequiredRegs = mEntryRequiredRegs;
lblock->mExitRequiredRegs = mExitRequiredRegs;
eblock->mEntryRequiredRegs = mExitRequiredRegs;
eblock->mExitRequiredRegs = mExitRequiredRegs;
mExitRequiredRegs = mEntryRequiredRegs;
for (int i = 0; i < mIns.Size(); i++)
lblock->mIns.Push(mIns[i]);
@ -20679,6 +21093,30 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
}
}
if (mEntryRequiredRegs.Size() && !mEntryRequiredRegs[CPU_REG_A])
{
for (int i = 0; i + 1 < mIns.Size(); i++)
{
if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE)
{
int j = 0;
while (j < mIns.Size() &&
(j == i + 0 || !mIns[j].ChangesZeroPage(mIns[i + 0].mAddress)) &&
(j == i + 1 || !mIns[j].ChangesZeroPage(mIns[i + 1].mAddress)))
j++;
if (j == mIns.Size())
{
if (!prevBlock)
return OptimizeSimpleLoopInvariant(proc);
prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, mIns[i + 0].mAddress));
prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, mIns[i + 1].mAddress));
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
}
}
}
}
CheckLive();
return changed;
@ -23658,7 +24096,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
//
// move ldx/y down
for (int i = 0; i + 2 < mIns.Size(); i++)
for (int i = 0; i + 1 < mIns.Size(); i++)
{
#if 1
if (mIns[i].mType == ASMIT_LDY)
@ -27014,6 +27452,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
#endif
#if 1
else if (
mIns[i + 0].mType == ASMIT_LDA &&
mIns[i + 1].mType == ASMIT_ASL && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_ROL && mIns[i + 2].mMode == ASMIM_IMPLIED && HasAsmInstructionMode(ASMIT_ROL, mIns[i + 0].mMode) &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_CPU_REG_A))
{
mIns[i + 2].CopyMode(mIns[i + 0]);
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
progress = true;
}
#endif
else if (
mIns[i + 0].ChangesAccuAndFlag() &&
mIns[i + 1].mType == ASMIT_STA &&
@ -28929,6 +29383,15 @@ void NativeCodeProcedure::CompressTemporaries(void)
int tpos = BC_REG_TMP + mInterProc->mFreeCallerSavedTemps;
int spos = BC_REG_TMP_SAVED;
NumberSet * collisionSet = new NumberSet[NUM_REGS];
for (int i = 0; i < NUM_REGS; i++)
collisionSet[i].Reset(NUM_REGS);
ResetVisited();
mEntryBlock->BuildCollisionTable(collisionSet);
// for (int tsize = 4; tsize > 0; tsize >>= 1)
{
for (int i = 0; i < mInterProc->mTempOffset.Size(); i++)
@ -28951,19 +29414,52 @@ void NativeCodeProcedure::CompressTemporaries(void)
if (usize)
{
int pos = spos;
if (tpos + usize <= BC_REG_TMP + mInterProc->mCallerSavedTemps)
#if 1
if (mInterProc->mLeafProcedure)
{
pos = tpos;
tpos += usize;
int k = 0;
while (k < usize && !collisionSet[k + BC_REG_ACCU][k + reg])
k++;
if (k == usize)
{
pos = BC_REG_ACCU;
for (int i = 0; i < 256; i++)
{
for (int j = 0; j < usize; j++)
{
if (collisionSet[j + reg][i])
{
collisionSet[j + BC_REG_ACCU] += i;
collisionSet[i] += j + BC_REG_ACCU;
}
}
}
}
}
#endif
if (pos == spos)
{
if (tpos + usize <= BC_REG_TMP + mInterProc->mCallerSavedTemps)
{
pos = tpos;
tpos += usize;
}
else
spos += usize;
mInterProc->mTempOffset[i] = pos - BC_REG_TMP;
mInterProc->mTempSizes[i] = usize;
}
else
spos += usize;
{
mInterProc->mTempOffset[i] = 0;
mInterProc->mTempSizes[i] = 0;
}
for (int j = 0; j < usize; j++)
remap[reg + j] = pos + j;
mInterProc->mTempOffset[i] = pos - BC_REG_TMP;
mInterProc->mTempSizes[i] = usize;
}
else
{
@ -28975,6 +29471,8 @@ void NativeCodeProcedure::CompressTemporaries(void)
}
}
delete[] collisionSet;
mInterProc->mCallerSavedTemps = tpos - BC_REG_TMP;
ResetVisited();
@ -29777,6 +30275,13 @@ void NativeCodeProcedure::Optimize(void)
if (!changed && mEntryBlock->ShortcutZeroPageCopyUp(this))
changed = true;
#endif
#if 1
ResetVisited();
if (!changed && mEntryBlock->CrossBlockXYShortcut())
changed = true;
#endif
}
#endif

View File

@ -147,6 +147,9 @@ public:
bool CanSwapXYReg(void);
bool SwapXYReg(void);
void BuildCollisionTable(NumberSet& liveTemps, NumberSet* collisionSets);
};
class NativeCodeBasicBlock
@ -270,6 +273,8 @@ public:
bool BuildGlobalRequiredRegSet(NumberSet& fromRequiredTemps);
bool RemoveUnusedResultInstructions(void);
void BuildCollisionTable(NumberSet* collisionSets);
bool IsSame(const NativeCodeBasicBlock* block) const;
bool FindSameBlocks(NativeCodeProcedure* nproc);
bool MergeSameBlocks(NativeCodeProcedure* nproc);
@ -365,11 +370,23 @@ public:
bool SameTail(const NativeCodeInstruction& ins) const;
bool HasTailSTA(int& addr, int& index) const;
bool HasTailSTX(int& addr, int& index) const;
bool HasTailSTY(int& addr, int& index) const;
bool PropagateSinglePath(void);
bool CanChangeTailZPStoreToX(int addr, const NativeCodeBasicBlock * nblock, const NativeCodeBasicBlock* fblock = nullptr) const;
void ChangeTailZPStoreToX(int addr);
bool CanCrossBlockAShortcut(int addr);
void DoCrossBlockAShortcut(int addr);
bool CanCrossBlockXShortcut(int addr);
void DoCrossBlockXShortcut(int addr);
bool CanCrossBlockYShortcut(int addr);
void DoCrossBlockYShortcut(int addr);
bool CrossBlockXYShortcut(void);
bool Check16BitSum(int at, NativeRegisterSum16Info& info);
bool Propagate16BitSum(void);