Fix reordering of function calls

This commit is contained in:
drmortalwombat 2022-06-30 20:28:36 +02:00
parent fdcaf54666
commit ba661759fb
5 changed files with 175 additions and 57 deletions

View File

@ -869,6 +869,22 @@ void ValueSet::InsertValue(InterInstruction * ins)
mInstructions[mNum++] = ins; mInstructions[mNum++] = ins;
} }
static bool HasSideEffect(InterCode code)
{
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
}
static bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* bins) static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* bins)
{ {
// Check ambiguity // Check ambiguity
@ -922,6 +938,9 @@ static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction*
static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins) static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins)
{ {
if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
return false;
if (lins->mDst.mTemp >= 0) if (lins->mDst.mTemp >= 0)
{ {
if (lins->mDst.mTemp == bins->mDst.mTemp) if (lins->mDst.mTemp == bins->mDst.mTemp)
@ -948,6 +967,9 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins
static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins) static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins)
{ {
if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
return false;
if (lins->mDst.mTemp >= 0) if (lins->mDst.mTemp >= 0)
{ {
if (lins->mDst.mTemp == bins->mDst.mTemp) if (lins->mDst.mTemp == bins->mDst.mTemp)
@ -2757,22 +2779,6 @@ void InterInstruction::PerformTempForwarding(TempForwardingTable& forwardingTabl
} }
} }
bool HasSideEffect(InterCode code)
{
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
}
bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps) bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps)
{ {
bool changed = false; bool changed = false;
@ -6919,9 +6925,10 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra
InterInstruction* lins = ltvalue[ins->mSrc[pi].mTemp]; InterInstruction* lins = ltvalue[ins->mSrc[pi].mTemp];
if (lins->mSrc[0].mTemp >= 0) if (lins->mSrc[0].mTemp >= 0)
{
if (lins->mSrc[1].mMemory != IM_ABSOLUTE || (lins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && lins->mSrc[0].mRange.mMaxValue >= 256))
{ {
InterInstruction* bins = lins; InterInstruction* bins = lins;
for (int j = 0; j < ltvalue.Size(); j++) for (int j = 0; j < ltvalue.Size(); j++)
{ {
InterInstruction* cins = ltvalue[j]; InterInstruction* cins = ltvalue[j];
@ -6930,11 +6937,11 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra
cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 && cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 &&
cins->mSrc[1].mMemory == bins->mSrc[1].mMemory && cins->mSrc[1].mMemory == bins->mSrc[1].mMemory &&
cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex && cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex &&
cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst && cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst)
cins->mSrc[1].mMemory != IM_ABSOLUTE) {
bins = cins; bins = cins;
} }
}
if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252) if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252)
{ {
@ -6945,6 +6952,7 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra
} }
} }
} }
}
int dtemp = ins->mDst.mTemp; int dtemp = ins->mDst.mTemp;
@ -10568,6 +10576,18 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 0]->mSrc[0].mIntConst = ~((1LL << shift) - 1); mInstructions[i + 0]->mSrc[0].mIntConst = ~((1LL << shift) - 1);
changed = true; changed = true;
} }
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_SHL && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
(mInstructions[i + 1]->mSrc[0].mIntConst << mInstructions[i + 0]->mSrc[0].mIntConst) < 65536)
{
mInstructions[i + 1]->mSrc[0].mIntConst <<= mInstructions[i + 0]->mSrc[0].mIntConst;;
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 0]->mCode = IC_NONE;
mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
#if 1 #if 1
else if ( else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 && mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
@ -10808,6 +10828,15 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst; mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst;
changed = true; changed = true;
} }
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
mInstructions[i + 1]->mSrc[1].mIntConst != 0)
{
mInstructions[i + 0]->mSrc[1].mIntConst += mInstructions[i + 1]->mSrc[1].mIntConst;
mInstructions[i + 1]->mSrc[1].mIntConst = 0;
changed = true;
}
#if 1 #if 1
else if ( else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL && mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL &&
@ -10830,6 +10859,22 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
changed = true; changed = true;
} }
#endif #endif
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD &&
mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL &&
mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
mInstructions[i + 2]->mCode == IC_LEA &&
mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal &&
mInstructions[i + 2]->mSrc[1].mTemp < 0)
{
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 2]->mSrc[1].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst;
mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
#if 1 #if 1
// Postincrement artifact // Postincrement artifact
@ -12047,6 +12092,8 @@ void InterCodeProcedure::Close(void)
EliminateAliasValues(); EliminateAliasValues();
MergeIndexedLoadStore();
#if 1 #if 1
ResetVisited(); ResetVisited();
mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars); mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars);

View File

@ -2726,7 +2726,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT
#endif #endif
#if 1 #if 1
if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y)) if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y) && !(mFlags & NICT_INDEXFLIPPED))
{ {
mMode = ASMIM_ABSOLUTE_Y; mMode = ASMIM_ABSOLUTE_Y;
changed = true; changed = true;
@ -20740,6 +20740,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{ {
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_X)); assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_X));
mIns[j].mMode = ASMIM_ABSOLUTE_X; mIns[j].mMode = ASMIM_ABSOLUTE_X;
mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j; n = j;
changed = true; changed = true;
} }
@ -20762,6 +20763,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{ {
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y)); assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y));
mIns[j].mMode = ASMIM_ABSOLUTE_Y; mIns[j].mMode = ASMIM_ABSOLUTE_Y;
mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j; n = j;
changed = true; changed = true;
} }
@ -21491,6 +21493,36 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType = ASMIT_STA; mIns[i + 1].mType = ASMIT_STA;
progress = true; progress = true;
} }
#if 1
else if (mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_STX)
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_X;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_STY)
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresXReg())
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 1] = ins;
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_TAY && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresYReg())
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 1] = ins;
progress = true;
}
#endif
else if ( else if (
mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED && mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED &&
mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
@ -23871,6 +23903,43 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true; progress = true;
} }
} }
#if 1
else if (
mIns[i + 0].mType == ASMIT_LDA &&
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_LDA &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 1].mAddress &&
mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress &&
!mIns[i + 0].ReferencesZeroPage(mIns[i + 3].mAddress) &&
!mIns[i + 2].ReferencesZeroPage(mIns[i + 1].mAddress) &&
!(mIns[i + 0].mFlags & NCIF_VOLATILE) && !(mIns[i + 2].mFlags & NCIF_VOLATILE))
{
NativeCodeInstruction ins(mIns[i + 0]);
mIns[i + 0] = mIns[i + 2];
mIns[i + 2] = ins;
mIns[i + 1].mAddress = mIns[i + 3].mAddress;
mIns[i + 3].mAddress = mIns[i + 4].mAddress;
if (mIns[i + 2].RequiresYReg())
{
mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
mIns[i + 1].mLive |= LIVE_CPU_REG_Y;
}
if (mIns[i + 2].RequiresXReg())
{
mIns[i + 0].mLive |= LIVE_CPU_REG_X;
mIns[i + 1].mLive |= LIVE_CPU_REG_X;
}
mIns[i + 0].mLive |= mIns[i + 2].mLive;
mIns[i + 2].mLive |= mIns[i + 4].mLive;
mIns[i + 3].mLive |= mIns[i + 4].mLive;
mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
progress = true;
}
#endif
} }
#endif #endif
CheckLive(); CheckLive();

View File

@ -64,6 +64,7 @@ static const uint32 NCIF_VOLATILE = 0x00000010;
static const uint32 NCIF_LONG = 0x00000020; static const uint32 NCIF_LONG = 0x00000020;
static const uint32 NCIF_FEXEC = 0x00000040; static const uint32 NCIF_FEXEC = 0x00000040;
static const uint32 NCIF_JSRFLAGS = 0x00000080; static const uint32 NCIF_JSRFLAGS = 0x00000080;
static const uint32 NICT_INDEXFLIPPED = 0x00000100;
static const uint32 NCIF_USE_CPU_REG_A = 0x00001000; static const uint32 NCIF_USE_CPU_REG_A = 0x00001000;
static const uint32 NCIF_USE_CPU_REG_X = 0x00002000; static const uint32 NCIF_USE_CPU_REG_X = 0x00002000;

View File

@ -93,6 +93,7 @@
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;version.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;version.lib;%(AdditionalDependencies)</AdditionalDependencies>
<StackReserveSize>16000000</StackReserveSize>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">

View File

@ -14,12 +14,12 @@ void copyFont(void)
mmap_set(MMAP_ROM); mmap_set(MMAP_ROM);
} }
// Screen and color space // Single row of screen has 40 characters
#define screen ((byte *)0x0400) typedef char ScreenRow[40];
#define color ((byte *)0xd800)
// Macro for easy access to screen space // Screen and color space
#define sline(x, y) (screen + 40 * (y) + (x)) ScreenRow * const screen = (ScreenRow *)0x0400;
ScreenRow * const color = (ScreenRow *)0xd800;
// Start row for text // Start row for text
#define srow 5 #define srow 5
@ -30,12 +30,12 @@ void scrollLeft(void)
// Loop horizontaly // Loop horizontaly
for(char x=0; x<39; x++) for(char x=0; x<39; x++)
{ {
// Unroll vetical loop 16 times // Unroll vertical loop 16 times
#assign y 0 #pragma unroll(full)
#repeat for(char y=0; y<16; y++)
sline(0, srow + y)[x] = sline(1, srow + y)[x]; {
#assign y y + 1 screen[srow + y][x] = screen[srow + y][x + 1];
#until y == 16 }
} }
} }
@ -46,13 +46,13 @@ void expand(char c, byte f)
byte * fp = font + 8 * c; byte * fp = font + 8 * c;
// Unroll eight times for each byte in glyph data // Unroll eight times for each byte in glyph data
#assign y 0 // #pragma unroll(full)
#repeat for(char y=0; y<8; y++)
sline(39, srow + 2 * y + 0)[0] = {
sline(39, srow + 2 * y + 1)[0] = (fp[y] & f) ? 160 : 32; char t = (fp[y] & f) ? 160 : 32;
#assign y y + 1 screen[srow + 2 * y + 0][39] = t;
#until y == 8 screen[srow + 2 * y + 1][39] = t;
}
} }
const char * text = const char * text =
@ -77,7 +77,7 @@ int main(void)
// Color bars // Color bars
for(int i=0; i<16; i++) for(int i=0; i<16; i++)
memset(color + 40 * (srow + i), i + 1, 40); memset(color[srow + i], i + 1, 40);
vic.color_back = VCOL_BLACK; vic.color_back = VCOL_BLACK;
vic.color_border = VCOL_BLACK; vic.color_border = VCOL_BLACK;