Optimize single conditional path value evaluation

This commit is contained in:
drmortalwombat 2021-11-09 22:24:29 +01:00
parent 7dddcc772b
commit cb5f5f421f
4 changed files with 244 additions and 25 deletions

View File

@ -74,6 +74,85 @@ unsigned shr8n(int n)
#pragma native(shr8n) #pragma native(shr8n)
void shl8xb(unsigned char xu, signed char xi)
{
unsigned char ua[16];
signed char ia[16];
#assign s 0
#repeat
ua[s] = xu << s;
ia[s] = xi << s;
#assign s s + 1
#until s == 16
for(int i=0; i<16; i++)
{
assert(ua[i] == (unsigned char)(xu << i));
assert(ia[i] == (signed char)(xi << i));
}
}
void shr8xb(unsigned char xu, signed char xi)
{
unsigned char ua[16];
signed char ia[16];
#assign s 0
#repeat
ua[s] = xu >> s;
ia[s] = xi >> s;
#assign s s + 1
#until s == 16
for(int i=0; i<16; i++)
{
assert(ua[i] == (unsigned char)(xu >> i));
assert(ia[i] == (signed char)(xi >> i));
}
}
void shl8xn(unsigned char xu, signed char xi)
{
unsigned char ua[16];
signed char ia[16];
#assign s 0
#repeat
ua[s] = xu << s;
ia[s] = xi << s;
#assign s s + 1
#until s == 16
for(int i=0; i<16; i++)
{
assert(ua[i] == (unsigned char)(xu << i));
assert(ia[i] == (signed char)(xi << i));
}
}
void shr8xn(unsigned char xu, signed char xi)
{
unsigned char ua[16];
signed char ia[16];
#assign s 0
#repeat
ua[s] = xu >> s;
ia[s] = xi >> s;
#assign s s + 1
#until s == 16
for(int i=0; i<16; i++)
{
assert(ua[i] == (unsigned char)(xu >> i));
assert(ia[i] == (signed char)(xi >> i));
}
}
#pragma native(shl8xn)
#pragma native(shr8xn)
void shl16b(unsigned xu, int xi) void shl16b(unsigned xu, int xi)
{ {
unsigned ua[16]; unsigned ua[16];
@ -248,6 +327,27 @@ int main(void)
assert(shr8b(i) == shr8n(i)); assert(shr8b(i) == shr8n(i));
} }
shl8xb(0x00, 0x00);
shl8xb(0xff, 0xff);
shl8xb(0x34, 0x34);
shl8xb(0xdc, 0xdc);
shr8xb(0x00, 0x00);
shr8xb(0xff, 0xff);
shr8xb(0x34, 0x34);
shr8xb(0xdc, 0xdc);
shl8xn(0x00, 0x00);
shl8xn(0xff, 0xff);
shl8xn(0x34, 0x34);
shl8xn(0xdc, 0xdc);
shr8xn(0x00, 0x00);
shr8xn(0xff, 0xff);
shr8xn(0x34, 0x34);
shr8xn(0xdc, 0xdc);
shl16b(0x0000, 0x0000); shl16b(0x0000, 0x0000);
shl16b(0xffff, 0xffff); shl16b(0xffff, 0xffff);
shl16b(0x1234, 0x1234); shl16b(0x1234, 0x1234);

View File

@ -1736,6 +1736,17 @@ bool HasSideEffect(InterCode code)
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER; return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
} }
bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps) bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps)
{ {
bool changed = false; bool changed = false;
@ -4096,6 +4107,92 @@ void InterCodeBasicBlock::MarkRelevantStatics(void)
} }
} }
bool InterCodeBasicBlock::PushSinglePathResultInstructions(void)
{
int i;
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mTrueJump && mFalseJump)
{
NumberSet trueExitRequiredTemps(mTrueJump->mEntryRequiredTemps), falseExitRequiredTems(mFalseJump->mEntryRequiredTemps);
NumberSet providedTemps(mExitRequiredTemps.Size()), requiredTemps(mExitRequiredTemps.Size());
bool hadStore = false;
int i = mInstructions.Size();
while (i > 0)
{
i--;
InterInstruction* ins(mInstructions[i]);
bool moved = false;
if (ins->mDst.mTemp >= 0 && !providedTemps[ins->mDst.mTemp] && !requiredTemps[ins->mDst.mTemp])
{
int j = 0;
while (j < ins->mNumOperands && !(ins->mSrc[j].mTemp >= 0 && providedTemps[ins->mSrc[j].mTemp]))
j++;
if (j == ins->mNumOperands && IsMoveable(ins->mCode) && (ins->mCode != IC_LOAD || !hadStore))
{
if (mTrueJump->mNumEntries == 1 && trueExitRequiredTemps[ins->mDst.mTemp] && !falseExitRequiredTems[ins->mDst.mTemp])
{
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp >= 0)
trueExitRequiredTemps += ins->mSrc[j].mTemp;
}
mTrueJump->mInstructions.Insert(0, ins);
mInstructions.Remove(i);
moved = true;
changed = true;
}
else if (mFalseJump->mNumEntries == 1 && !trueExitRequiredTemps[ins->mDst.mTemp] && falseExitRequiredTems[ins->mDst.mTemp])
{
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp >= 0)
falseExitRequiredTems += ins->mSrc[j].mTemp;
}
mFalseJump->mInstructions.Insert(0, ins);
mInstructions.Remove(i);
moved = true;
changed = true;
}
}
providedTemps += ins->mDst.mTemp;
}
if (!moved)
{
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp >= 0)
requiredTemps += ins->mSrc[j].mTemp;
}
}
if (HasSideEffect(ins->mCode))
hadStore = true;
}
}
if (mTrueJump && mTrueJump->PushSinglePathResultInstructions())
changed = true;
if (mFalseJump && mFalseJump->PushSinglePathResultInstructions())
changed = true;
}
return changed;
}
void InterCodeBasicBlock::RemoveNonRelevantStatics(void) void InterCodeBasicBlock::RemoveNonRelevantStatics(void)
{ {
int i; int i;
@ -4403,16 +4500,6 @@ InterCodeBasicBlock* InterCodeBasicBlock::PropagateDominator(InterCodeProcedure*
return mDominator ? mDominator : this; return mDominator ? mDominator : this;
} }
bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
return false;
if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
return false;
return true;
}
void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedParams) void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedParams)
{ {
if (!mVisited) if (!mVisited)
@ -5466,6 +5553,23 @@ void InterCodeProcedure::Close(void)
DisassembleDebug("Peephole optimized"); DisassembleDebug("Peephole optimized");
bool changed = false;
do
{
BuildDataFlowSets();
ResetVisited();
changed = mEntryBlock->PushSinglePathResultInstructions();
} while (changed);
BuildDataFlowSets();
TempForwarding();
RemoveUnusedInstructions();
DisassembleDebug("Moved single path instructions");
FastNumberSet activeSet(numTemps); FastNumberSet activeSet(numTemps);
// //

View File

@ -502,6 +502,8 @@ public:
void MarkRelevantStatics(void); void MarkRelevantStatics(void);
void RemoveNonRelevantStatics(void); void RemoveNonRelevantStatics(void);
bool PushSinglePathResultInstructions(void);
void PeepholeOptimization(void); void PeepholeOptimization(void);
void SingleBlockLoopOptimisation(const NumberSet& aliasedParams); void SingleBlockLoopOptimisation(const NumberSet& aliasedParams);

View File

@ -5732,6 +5732,9 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
} }
else else
{ {
NativeCodeBasicBlock* lblock = nproc->AllocateBlock();
NativeCodeBasicBlock* eblock = nproc->AllocateBlock();
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f));
mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED));
@ -5754,15 +5757,19 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
} }
mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00)); mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00));
mIns.Push(NativeCodeInstruction(ASMIT_BEQ, ASMIM_RELATIVE, 2 + 1 + 1 + 2)); this->Close(lblock, eblock, ASMIT_BNE);
mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg + 0));
mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_BNE, ASMIM_RELATIVE, -(2 + 1 + 1 + 2))); lblock->Close(lblock, eblock, ASMIT_BNE);
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); eblock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1));
return eblock;
} }
} break; } break;
case IA_SAR: case IA_SAR:
{ {
@ -5858,11 +5865,13 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
for (int i = 0; i < shift; i++) for (int i = 0; i < shift; i++)
{ {
mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg));
} }
mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 0x80 >> shift));
mIns.Push(NativeCodeInstruction(ASMIT_SEC, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_IMMEDIATE, 0x80 >> shift));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1));
} }
} }
@ -5896,6 +5905,9 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
} }
else else
{ {
NativeCodeBasicBlock* lblock = nproc->AllocateBlock();
NativeCodeBasicBlock* eblock = nproc->AllocateBlock();
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f));
mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED));
@ -5918,15 +5930,16 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
} }
mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00)); mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00));
mIns.Push(NativeCodeInstruction(ASMIT_BEQ, ASMIM_RELATIVE, 2 + 1 + 2 + 1 + 2)); this->Close(lblock, eblock, ASMIT_BNE);
mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80));
mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg + 0));
mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); lblock->mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_BNE, ASMIM_RELATIVE, -(2 + 1 + 2 + 1 + 2))); lblock->Close(lblock, eblock, ASMIT_BNE);
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); eblock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1));
return eblock;
} }
} break; } break;