Improve outer loop optimizations

This commit is contained in:
drmortalwombat 2023-07-28 18:51:29 +02:00
parent 4c5a9c3b70
commit 02e7f15810
8 changed files with 201 additions and 20 deletions

View File

@ -1,11 +1,8 @@
#include "memmap.h"
volatile char PLAShadow;
__asm DoneTrampoline
{
lda PLAShadow
sta $01
stx $01
pla
tax
pla
@ -17,8 +14,6 @@ __asm IRQTrampoline
pha
txa
pha
lda #$36
sta $01
lda #>DoneTrampoline
pha
@ -27,6 +22,9 @@ __asm IRQTrampoline
tsx
lda $0105, x
pha
ldx $01
lda #$36
sta $01
jmp ($fffe)
}
@ -35,8 +33,6 @@ __asm NMITrampoline
pha
txa
pha
lda #$36
sta $01
lda #>DoneTrampoline
pha
@ -45,6 +41,9 @@ __asm NMITrampoline
tsx
lda $0105, x
pha
ldx $01
lda #$36
sta $01
jmp ($fffa)
}
@ -56,8 +55,9 @@ void mmap_trampoline(void)
#pragma native(mmap_trampoline)
void mmap_set(char pla)
char mmap_set(char pla)
{
PLAShadow = pla;
char ppla = *((char *)0x01);
*((volatile char *)0x01) = pla;
return ppla;
}

View File

@ -18,9 +18,9 @@
void mmap_trampoline(void);
// Set the memory map in a way that is compatible with the IRQ
// trampoline
// trampoline, returns the previous state
inline void mmap_set(char pla);
inline char mmap_set(char pla);
#pragma compile("memmap.c")

View File

@ -160,7 +160,7 @@ wx1:
jmp lx2
w0:
lda #$3f
lda #$2f
sta $00
lda #$36
sta $01

View File

@ -1285,7 +1285,7 @@ bool Declaration::IsSame(const Declaration* dec) const
return mIdent == dec->mIdent;
else if (mType == DT_TYPE_POINTER || mType == DT_TYPE_ARRAY)
{
if (mBase->mType == DT_TYPE_STRUCT && dec->mBase->mType == DT_TYPE_STRUCT)
if (mBase->mType == DT_TYPE_STRUCT && dec->mBase->mType == DT_TYPE_STRUCT && mBase->mStripe == dec->mBase->mStripe)
{
if (mBase->mQualIdent == dec->mBase->mQualIdent &&
(mBase->mFlags & (DTF_CONST | DTF_VOLATILE)) == (dec->mBase->mFlags & (DTF_CONST | DTF_VOLATILE)))
@ -1373,7 +1373,7 @@ bool Declaration::CanAssign(const Declaration* fromType) const
{
if (mBase->mType == DT_TYPE_VOID || fromType->mBase->mType == DT_TYPE_VOID)
return (mBase->mFlags & DTF_CONST) || !(fromType->mBase->mFlags & DTF_CONST);
else if (mBase->IsSubType(fromType->mBase))
else if (mBase->mStripe == fromType->mBase->mStripe && mBase->IsSubType(fromType->mBase))
return true;
}
else if (mBase->mType == DT_TYPE_FUNCTION && fromType->mType == DT_TYPE_FUNCTION)

View File

@ -530,6 +530,18 @@ bool InterCodeBasicBlock::DestroyingMem(const InterInstruction* lins, const Inte
return false;
}
bool InterCodeBasicBlock::DestroyingMem(InterCodeBasicBlock* block, InterInstruction* lins, int from, int to) const
{
for (int i = from; i < to; i++)
{
InterInstruction* ins = block->mInstructions[i];
if (DestroyingMem(lins, ins))
return true;
}
return false;
}
static bool SameMem(const InterOperand& op1, const InterOperand& op2)
{
if (op1.mMemory != op2.mMemory || op1.mType != op2.mType || op1.mIntConst != op2.mIntConst)
@ -8036,7 +8048,7 @@ void InterCodeBasicBlock::PerformTempForwarding(const TempForwardingTable& forwa
{
if (mEntryBlocks[i] != mLoopPrefix)
{
if (!mEntryBlocks[i]->CollectLoopBody(this, body))
if (!mEntryBlocks[i]->CollectLoopBodyRecursive(this, body))
innerLoop = false;
}
}
@ -11578,6 +11590,10 @@ void InterCodeBasicBlock::BuildLoopSuffix(void)
if (mFalseJump->mNumEntries > 1)
{
InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc);
suffix->mEntryRequiredTemps = mFalseJump->mEntryRequiredTemps;
suffix->mExitRequiredTemps = mFalseJump->mEntryRequiredTemps;
suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size());
InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP);
suffix->Append(jins);
suffix->Close(mFalseJump, nullptr);
@ -11590,6 +11606,10 @@ void InterCodeBasicBlock::BuildLoopSuffix(void)
if (mTrueJump->mNumEntries > 1)
{
InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc);
suffix->mEntryRequiredTemps = mTrueJump->mEntryRequiredTemps;
suffix->mExitRequiredTemps = mTrueJump->mEntryRequiredTemps;
suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size());
InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP);
suffix->Append(jins);
suffix->Close(mTrueJump, nullptr);
@ -11620,6 +11640,10 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(void)
if (mLoopHead)
{
mLoopPrefix = new InterCodeBasicBlock(mProc);
mLoopPrefix->mEntryRequiredTemps = mEntryRequiredTemps;
mLoopPrefix->mExitRequiredTemps = mEntryRequiredTemps;
mLoopPrefix->mLocalModifiedTemps.Reset(mEntryRequiredTemps.Size());
InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP);
mLoopPrefix->Append(jins);
mLoopPrefix->Close(this, nullptr);
@ -11645,6 +11669,22 @@ bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, GrowingArra
return true;
}
bool InterCodeBasicBlock::CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*>& body)
{
if (this == head)
return true;
if (body.IndexOf(this) != -1)
return true;
body.Push(this);
for (int i = 0; i < mEntryBlocks.Size(); i++)
if (!mEntryBlocks[i]->CollectLoopBodyRecursive(head, body))
return false;
return true;
}
void InterCodeBasicBlock::CollectLoopPath(const GrowingArray<InterCodeBasicBlock*>& body, GrowingArray<InterCodeBasicBlock*>& path)
{
if (body.IndexOf(this) >= 0)
@ -11872,6 +11912,17 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
}
}
#endif
bool hasStore = false;
for (int j = 0; j < body.Size(); j++)
{
int sz = body[j]->mInstructions.Size();
for (int i = 0; i < sz; i++)
{
InterInstruction* ins = body[j]->mInstructions[i];
if (IsObservable(ins->mCode))
hasStore = true;
}
}
int i = 0;
while (i < mInstructions.Size())
@ -11880,7 +11931,8 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
if (lins->mCode == IC_BINARY_OPERATOR || lins->mCode == IC_CONSTANT || lins->mCode == IC_UNARY_OPERATOR ||
lins->mCode == IC_CONVERSION_OPERATOR || lins->mCode == IC_SELECT ||
lins->mCode == IC_RELATIONAL_OPERATOR)
lins->mCode == IC_LEA ||
lins->mCode == IC_RELATIONAL_OPERATOR || (lins->mCode == IC_LOAD && !hasStore && !lins->mVolatile))
{
#if 1
if (CanMoveInstructionBeforeBlock(i) && !IsInsModifiedInRange(i + 1, mInstructions.Size(), lins) && !tail->IsInsModified(lins) && !lins->UsesTemp(lins->mDst.mTemp))
@ -11901,7 +11953,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
}
#endif
}
else if (lins->mCode == IC_LOAD && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp])
else if (lins->mCode == IC_LOAD && !lins->mVolatile && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp])
{
if (CanMoveInstructionBeforeBlock(i))
{
@ -11963,8 +12015,36 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
}
}
}
else
{
int k = 0;
while (k < body.Size() && !DestroyingMem(body[k], lins, 0, body[k]->mInstructions.Size()))
k++;
if (k == body.Size())
{
#if 1
if (!IsInsModifiedInRange(i + 1, mInstructions.Size(), lins) && !tail->IsInsModified(lins))
{
int j = 1;
while (j < body.Size() && !body[j]->IsInsModified(lins))
j++;
if (j == body.Size())
{
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
i--;
modified = true;
}
}
#endif
}
}
}
}
i++;
}
}

View File

@ -502,6 +502,7 @@ public:
bool IsTempReferencedOnPath(int temp, int at) const;
bool DestroyingMem(const InterInstruction* lins, const InterInstruction* sins) const;
bool DestroyingMem(InterCodeBasicBlock* block, InterInstruction* lins, int from, int to) const;
bool CollidingMem(const InterInstruction* ins1, const InterInstruction* ins2) const;
bool CollidingMem(const InterOperand& op, InterType type, const InterInstruction* ins) const;
bool CollidingMem(const InterOperand& op1, InterType type1, const InterOperand& op2, InterType type2) const;
@ -542,6 +543,7 @@ public:
bool SingleBlockLoopPointerSplit(int& spareTemps);
bool SingleBlockLoopPointerToByte(int& spareTemps);
bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*> & body);
bool CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*>& body);
void CollectLoopPath(const GrowingArray<InterCodeBasicBlock*>& body, GrowingArray<InterCodeBasicBlock*>& path);
void InnerLoopOptimization(const NumberSet& aliasedParams);
void PushMoveOutOfLoop(void);

View File

@ -2403,6 +2403,16 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data)
}
}
break;
case ASMIT_CLC:
data.mRegs[CPU_REG_C].mValue = 0;
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
break;
case ASMIT_SEC:
data.mRegs[CPU_REG_C].mValue = 1;
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
break;
}
}
@ -12978,10 +12988,26 @@ void NativeCodeBasicBlock::BuildEntryDataSet(const NativeRegisterDataSet& set)
for (int i = 0; i < mIns.Size(); i++)
mIns[i].Simulate(mNDataSet);
mFDataSet = mNDataSet;
if (mBranch == ASMIT_BCC)
{
mNDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
mNDataSet.mRegs[CPU_REG_C].mValue = 0;
mFDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
mFDataSet.mRegs[CPU_REG_C].mValue = 1;
}
else if (mBranch == ASMIT_BCS)
{
mNDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
mNDataSet.mRegs[CPU_REG_C].mValue = 1;
mFDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
mFDataSet.mRegs[CPU_REG_C].mValue = 0;
}
if (mTrueJump)
mTrueJump->BuildEntryDataSet(mNDataSet);
if (mFalseJump)
mFalseJump->BuildEntryDataSet(mNDataSet);
mFalseJump->BuildEntryDataSet(mFDataSet);
}
}
@ -15889,6 +15915,10 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc)
{
mVisited = true;
int carry = -1;
if (mEntryRegisterDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE)
carry = mEntryRegisterDataSet.mRegs[CPU_REG_C].mValue;
for (int i = 0; i < mIns.Size(); i++)
{
if (i + 2 < mIns.Size() &&
@ -16229,6 +16259,45 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc)
break;
}
if (carry == 0 &&
mIns[i + 0].mType == ASMIT_LDA &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 1 &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 0].SameEffectiveAddress(mIns[i + 2]) &&
mIns[i + 3].mType == ASMIT_LDA &&
mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 0 &&
mIns[i + 5].mType == ASMIT_STA && mIns[i + 3].SameEffectiveAddress(mIns[i + 5]) &&
HasAsmInstructionMode(ASMIT_INC, mIns[i + 2].mMode) &&
HasAsmInstructionMode(ASMIT_INC, mIns[i + 5].mMode) &&
!(mIns[i + 5].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z)))
{
changed = true;
NativeCodeBasicBlock* iblock = proc->AllocateBlock();
NativeCodeBasicBlock* fblock = proc->AllocateBlock();
fblock->mTrueJump = mTrueJump;
fblock->mFalseJump = mFalseJump;
fblock->mBranch = mBranch;
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
mIns[i + 2].mType = ASMIT_INC; mIns[i + 2].mLive |= LIVE_CPU_REG_Z;
for (int j = i + 6; j < mIns.Size(); j++)
fblock->mIns.Push(mIns[j]);
iblock->mIns.Push(mIns[i + 5]);
mIns.SetSize(i + 3);
iblock->mIns[0].mType = ASMIT_INC;
iblock->mTrueJump = fblock;
iblock->mBranch = ASMIT_JMP;
mTrueJump = fblock;
mFalseJump = iblock;
mBranch = ASMIT_BNE;
break;
}
if (mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA &&
mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 0xff &&
@ -17027,6 +17096,13 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc)
}
}
if (mIns[i].mType == ASMIT_CLC)
carry = 0;
else if (mIns[i].mType == ASMIT_SEC)
carry = 0;
else if (mIns[i].ChangesCarry())
carry = -1;
}
#if 1
@ -18369,6 +18445,19 @@ bool NativeCodeBasicBlock::PropagateSinglePath(void)
changed = true;
}
}
if (mTrueJump->mNumEntries == 1 && mTrueJump->mIns.Size() > 0 && mTrueJump->mIns[0].mType == ASMIT_TAX && !mFalseJump->mEntryRequiredRegs[CPU_REG_A] && !mFalseJump->mEntryRequiredRegs[CPU_REG_X] && !(mTrueJump->mIns[0].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
{
int sz = mIns.Size();
if (sz >= 2 && mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_LDX, mIns[sz - 2].mMode) && HasAsmInstructionMode(ASMIT_CPX, mIns[sz - 1].mMode))
{
mIns[sz - 2].mType = ASMIT_LDX; mIns[sz - 2].mLive |= LIVE_CPU_REG_X;
mIns[sz - 1].mType = ASMIT_CPX; mIns[sz - 1].mLive |= LIVE_CPU_REG_X;
mExitRequiredRegs += CPU_REG_X;
mTrueJump->mEntryRequiredRegs += CPU_REG_X;
mTrueJump->mIns[0].mType = ASMIT_NOP;
changed = true;
}
}
}
#endif
@ -40404,7 +40493,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
{
mInterProc = proc;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "gauge_show");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "test");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];
@ -41612,6 +41701,10 @@ void NativeCodeProcedure::Optimize(void)
#if 1
if (step >= 7)
{
ResetVisited();
NativeRegisterDataSet data;
mEntryBlock->BuildEntryDataSet(data);
ResetVisited();
if (mEntryBlock->ExpandADCToBranch(this))
changed = true;

View File

@ -609,6 +609,12 @@ public:
bool CheckPatchFailLoop(const NativeCodeBasicBlock* block, const NativeCodeBasicBlock* head, int reg, bool changed);
// reg : base register pair to replace
// index: index register
// at : start position in block
// yval: known y immediate value of -1 if not known
// lobj: linker object addressed
// address: offset into linker object
bool CheckGlobalAddressSumYPointer(const NativeCodeBasicBlock * block, int reg, int index, int at, int yval);
bool PatchGlobalAddressSumYPointer(const NativeCodeBasicBlock* block, int reg, int index, int at, int yval, LinkerObject * lobj, int address, uint32 flags = NCIF_LOWER | NCIF_UPPER);