Fix reordering of function calls

This commit is contained in:
drmortalwombat 2022-06-30 20:28:36 +02:00
parent fdcaf54666
commit ba661759fb
5 changed files with 175 additions and 57 deletions

View File

@ -869,6 +869,22 @@ void ValueSet::InsertValue(InterInstruction * ins)
mInstructions[mNum++] = ins;
}
static bool HasSideEffect(InterCode code)
{
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
}
static bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* bins)
{
// Check ambiguity
@ -922,6 +938,9 @@ static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction*
static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins)
{
if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
return false;
if (lins->mDst.mTemp >= 0)
{
if (lins->mDst.mTemp == bins->mDst.mTemp)
@ -948,6 +967,9 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins
static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins)
{
if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
return false;
if (lins->mDst.mTemp >= 0)
{
if (lins->mDst.mTemp == bins->mDst.mTemp)
@ -2757,22 +2779,6 @@ void InterInstruction::PerformTempForwarding(TempForwardingTable& forwardingTabl
}
}
bool HasSideEffect(InterCode code)
{
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
}
bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps)
{
bool changed = false;
@ -6920,27 +6926,29 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra
if (lins->mSrc[0].mTemp >= 0)
{
InterInstruction* bins = lins;
for (int j = 0; j < ltvalue.Size(); j++)
if (lins->mSrc[1].mMemory != IM_ABSOLUTE || (lins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && lins->mSrc[0].mRange.mMaxValue >= 256))
{
InterInstruction* cins = ltvalue[j];
if (cins &&
cins->mSrc[0].mTemp == bins->mSrc[0].mTemp &&
cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 &&
cins->mSrc[1].mMemory == bins->mSrc[1].mMemory &&
cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex &&
cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst &&
cins->mSrc[1].mMemory != IM_ABSOLUTE)
InterInstruction* bins = lins;
for (int j = 0; j < ltvalue.Size(); j++)
{
InterInstruction* cins = ltvalue[j];
if (cins &&
cins->mSrc[0].mTemp == bins->mSrc[0].mTemp &&
cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 &&
cins->mSrc[1].mMemory == bins->mSrc[1].mMemory &&
cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex &&
cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst)
{
bins = cins;
}
}
bins = cins;
}
if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252)
{
ins->mSrc[pi].mTemp = bins->mDst.mTemp;
ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst;
changed = true;
if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252)
{
ins->mSrc[pi].mTemp = bins->mDst.mTemp;
ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst;
changed = true;
}
}
}
}
@ -10568,6 +10576,18 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 0]->mSrc[0].mIntConst = ~((1LL << shift) - 1);
changed = true;
}
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_SHL && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
(mInstructions[i + 1]->mSrc[0].mIntConst << mInstructions[i + 0]->mSrc[0].mIntConst) < 65536)
{
mInstructions[i + 1]->mSrc[0].mIntConst <<= mInstructions[i + 0]->mSrc[0].mIntConst;;
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 0]->mCode = IC_NONE;
mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
#if 1
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
@ -10808,6 +10828,15 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst;
changed = true;
}
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
mInstructions[i + 1]->mSrc[1].mIntConst != 0)
{
mInstructions[i + 0]->mSrc[1].mIntConst += mInstructions[i + 1]->mSrc[1].mIntConst;
mInstructions[i + 1]->mSrc[1].mIntConst = 0;
changed = true;
}
#if 1
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL &&
@ -10830,6 +10859,22 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
changed = true;
}
#endif
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD &&
mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL &&
mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
mInstructions[i + 2]->mCode == IC_LEA &&
mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal &&
mInstructions[i + 2]->mSrc[1].mTemp < 0)
{
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 2]->mSrc[1].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst;
mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
#if 1
// Postincrement artifact
@ -12047,6 +12092,8 @@ void InterCodeProcedure::Close(void)
EliminateAliasValues();
MergeIndexedLoadStore();
#if 1
ResetVisited();
mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars);

View File

@ -2726,7 +2726,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT
#endif
#if 1
if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y))
if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y) && !(mFlags & NICT_INDEXFLIPPED))
{
mMode = ASMIM_ABSOLUTE_Y;
changed = true;
@ -20740,6 +20740,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_X));
mIns[j].mMode = ASMIM_ABSOLUTE_X;
mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j;
changed = true;
}
@ -20762,6 +20763,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y));
mIns[j].mMode = ASMIM_ABSOLUTE_Y;
mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j;
changed = true;
}
@ -21376,7 +21378,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C))
else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C))
{
mIns[i + 0].mType = ASMIT_CLC;
mIns[i + 1].mAddress++;
@ -21401,7 +21403,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 &&
mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A)))
mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A)))
{
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 1].mType = ASMIT_LDA;
@ -21491,6 +21493,36 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType = ASMIT_STA;
progress = true;
}
#if 1
else if (mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_STX)
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_X;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_STY)
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresXReg())
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TAY && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresYReg())
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 1] = ins;
progress = true;
}
#endif
else if (
mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED &&
mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
@ -23871,6 +23903,43 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
}
#if 1
else if (
mIns[i + 0].mType == ASMIT_LDA &&
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_LDA &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 1].mAddress &&
mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress &&
!mIns[i + 0].ReferencesZeroPage(mIns[i + 3].mAddress) &&
!mIns[i + 2].ReferencesZeroPage(mIns[i + 1].mAddress) &&
!(mIns[i + 0].mFlags & NCIF_VOLATILE) && !(mIns[i + 2].mFlags & NCIF_VOLATILE))
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 2];
mIns[i + 2] = ins;
mIns[i + 1].mAddress = mIns[i + 3].mAddress;
mIns[i + 3].mAddress = mIns[i + 4].mAddress;
if (mIns[i + 2].RequiresYReg())
{
mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
mIns[i + 1].mLive |= LIVE_CPU_REG_Y;
}
if (mIns[i + 2].RequiresXReg())
{
mIns[i + 0].mLive |= LIVE_CPU_REG_X;
mIns[i + 1].mLive |= LIVE_CPU_REG_X;
}
mIns[i + 0].mLive |= mIns[i + 2].mLive;
mIns[i + 2].mLive |= mIns[i + 4].mLive;
mIns[i + 3].mLive |= mIns[i + 4].mLive;
mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
progress = true;
}
#endif
}
#endif
CheckLive();

View File

@ -64,6 +64,7 @@ static const uint32 NCIF_VOLATILE = 0x00000010;
static const uint32 NCIF_LONG = 0x00000020;
static const uint32 NCIF_FEXEC = 0x00000040;
static const uint32 NCIF_JSRFLAGS = 0x00000080;
static const uint32 NICT_INDEXFLIPPED = 0x00000100;
static const uint32 NCIF_USE_CPU_REG_A = 0x00001000;
static const uint32 NCIF_USE_CPU_REG_X = 0x00002000;

View File

@ -93,6 +93,7 @@
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;version.lib;%(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>16000000</StackReserveSize>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">

View File

@ -14,12 +14,12 @@ void copyFont(void)
mmap_set(MMAP_ROM);
}
// Screen and color space
#define screen ((byte *)0x0400)
#define color ((byte *)0xd800)
// Single row of screen has 40 characters
typedef char ScreenRow[40];
// Macro for easy access to screen space
#define sline(x, y) (screen + 40 * (y) + (x))
// Screen and color space
ScreenRow * const screen = (ScreenRow *)0x0400;
ScreenRow * const color = (ScreenRow *)0xd800;
// Start row for text
#define srow 5
@ -30,12 +30,12 @@ void scrollLeft(void)
// Loop horizontaly
for(char x=0; x<39; x++)
{
// Unroll vetical loop 16 times
#assign y 0
#repeat
sline(0, srow + y)[x] = sline(1, srow + y)[x];
#assign y y + 1
#until y == 16
// Unroll vertical loop 16 times
#pragma unroll(full)
for(char y=0; y<16; y++)
{
screen[srow + y][x] = screen[srow + y][x + 1];
}
}
}
@ -46,13 +46,13 @@ void expand(char c, byte f)
byte * fp = font + 8 * c;
// Unroll eight times for each byte in glyph data
#assign y 0
#repeat
sline(39, srow + 2 * y + 0)[0] =
sline(39, srow + 2 * y + 1)[0] = (fp[y] & f) ? 160 : 32;
#assign y y + 1
#until y == 8
// #pragma unroll(full)
for(char y=0; y<8; y++)
{
char t = (fp[y] & f) ? 160 : 32;
screen[srow + 2 * y + 0][39] = t;
screen[srow + 2 * y + 1][39] = t;
}
}
const char * text =
@ -77,7 +77,7 @@ int main(void)
// Color bars
for(int i=0; i<16; i++)
memset(color + 40 * (srow + i), i + 1, 40);
memset(color[srow + i], i + 1, 40);
vic.color_back = VCOL_BLACK;
vic.color_border = VCOL_BLACK;