Byte code generator optimizations

This commit is contained in:
drmortalwombat 2021-11-06 14:29:12 +01:00
parent a8ed15d67b
commit 417f65e2c2
6 changed files with 429 additions and 32 deletions

View File

@ -126,6 +126,69 @@ bool ngez(int a)
bool bequz(unsigned a)
{
return a == 0;
}
bool bltuz(unsigned a)
{
return a < 0;
}
bool bgtuz(unsigned a)
{
return a > 0;
}
bool bleuz(unsigned a)
{
return a <= 0;
}
bool bgeuz(unsigned a)
{
return a >= 0;
}
bool nequz(unsigned a)
{
return a == 0;
}
#pragma native(nequz)
bool nltuz(unsigned a)
{
return a < 0;
}
#pragma native(nltuz)
bool ngtuz(unsigned a)
{
return a > 0;
}
#pragma native(ngtuz)
bool nleuz(unsigned a)
{
return a <= 0;
}
#pragma native(nleuz)
bool ngeuz(unsigned a)
{
return a >= 0;
}
#pragma native(ngeuz)
bool beq1(int a)
{
return a == 1;
@ -218,6 +281,21 @@ void cmpz(int a)
assert(bgef == ngef);
}
void cmpuz(unsigned a)
{
bool beqf = bequz(a), bltf = bltuz(a), bgtf = bgtuz(a), blef = bleuz(a), bgef = bgeuz(a);
bool neqf = nequz(a), nltf = nltuz(a), ngtf = ngtuz(a), nlef = nleuz(a), ngef = ngeuz(a);
printf("BYTE %u, 0 : EQ %u LT %d GT %u\r", a, beqf, bltf, bgtf);
printf("NATIVE %u, 0 : EQ %u LT %d GT %u\r", a, neqf, nltf, ngtf);
assert(beqf == neqf);
assert(bltf == nltf);
assert(bgtf == ngtf);
assert(blef == nlef);
assert(bgef == ngef);
}
void cmp1(int a)
{
bool beqf = beq1(a), bltf = blt1(a), bgtf = bgt1(a), blef = ble1(a), bgef = bge1(a);
@ -316,6 +394,17 @@ int main(void)
cmpz(-10000);
cmpz(-20000);
cmpuz(0);
cmpuz(1);
cmpuz(255);
cmpuz(256);
cmpuz(10000);
cmpuz(20000);
cmpuz(40000);
cmpuz(32767);
cmpuz(32768);
cmpuz(65535);
cmp1(0);
cmp1(1);
cmp1(2);

View File

@ -2,6 +2,7 @@
#include <assert.h>
typedef signed char int8;
typedef unsigned char uint8;
bool beq(int8 a, int8 b)
{
@ -128,6 +129,73 @@ bool ngez(int8 a)
bool bequz(uint8 a)
{
return a == 0;
}
bool bltuz(uint8 a)
{
return a < 0;
}
bool bgtuz(uint8 a)
{
return a > 0;
}
bool bleuz(uint8 a)
{
return a <= 0;
}
bool bgeuz(uint8 a)
{
return a >= 0;
}
bool nequz(uint8 a)
{
return a == 0;
}
#pragma native(nequz)
bool nltuz(uint8 a)
{
return a < 0;
}
#pragma native(nltuz)
bool ngtuz(uint8 a)
{
return a > 0;
}
#pragma native(ngtuz)
bool nleuz(uint8 a)
{
return a <= 0;
}
#pragma native(nleuz)
bool ngeuz(uint8 a)
{
return a >= 0;
}
#pragma native(ngeuz)
bool beq1(int8 a)
{
return a == 1;
@ -220,6 +288,21 @@ void cmpz(int8 a)
assert(bgef == ngef);
}
void cmpuz(uint8 a)
{
bool beqf = bequz(a), bltf = bltuz(a), bgtf = bgtuz(a), blef = bleuz(a), bgef = bgeuz(a);
bool neqf = nequz(a), nltf = nltuz(a), ngtf = ngtuz(a), nlef = nleuz(a), ngef = ngeuz(a);
printf("BYTE %d, 0 : EQ %d LT %d GT %d\r", a, beqf, bltf, bgtf);
printf("NATIVE %d, 0 : EQ %d LT %d GT %d\r", a, neqf, nltf, ngtf);
assert(beqf == neqf);
assert(bltf == nltf);
assert(bgtf == ngtf);
assert(blef == nlef);
assert(bgef == ngef);
}
void cmp1(int8 a)
{
bool beqf = beq1(a), bltf = blt1(a), bgtf = bgt1(a), blef = ble1(a), bgef = bge1(a);
@ -413,6 +496,12 @@ int main(void)
cmpz(-1);
cmpz(-128);
cmpuz(0);
cmpuz(1);
cmpuz(127);
cmpuz(128);
cmpuz(255);
cmp1(0);
cmp1(1);
cmp1(2);

View File

@ -245,6 +245,11 @@ bool ByteCodeInstruction::ChangesAddr(void) const
return ChangesRegister(BC_REG_ADDR);
}
bool ByteCodeInstruction::UsesAddr(void) const
{
return UsesRegister(BC_REG_ADDR);
}
bool ByteCodeInstruction::LoadsRegister(uint32 reg) const
{
if (mRegister == reg)
@ -392,10 +397,10 @@ bool ByteCodeInstruction::UsesRegister(uint32 reg) const
if (reg == BC_REG_ADDR)
{
if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_LOAD_ADDR_32)
if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_STORE_ADDR_32)
return true;
if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_STORE_ADDR_32)
if (mCode == BC_COPY || mCode == BC_STRCPY)
return true;
if (mCode == BC_JSR || mCode == BC_CALL)
@ -2685,12 +2690,31 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const
mIns.Push(cins);
}
else
{
if (ins->mSrc[1].mIntConst == 0)
{
switch (ins->mOperator)
{
case IA_CMPEQ:
case IA_CMPGEU:
return BC_BRANCHS_EQ;
case IA_CMPNE:
case IA_CMPLU:
return BC_BRANCHS_NE;
case IA_CMPLEU:
return BC_JUMPS;
case IA_CMPGU:
return BC_NOP;
}
}
else
{
ByteCodeInstruction cins(BC_BINOP_CMPUI_8);
cins.mValue = ins->mSrc[1].mIntConst;
mIns.Push(cins);
}
}
}
else if (ins->mSrc[0].mTemp < 0)
{
ByteCodeInstruction lins(BC_LOAD_REG_8);
@ -2698,17 +2722,61 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const
lins.mRegisterFinal = ins->mSrc[1].mFinal;
mIns.Push(lins);
if (csigned)
{
if (ins->mSrc[0].mIntConst == 0)
{
ByteCodeInstruction cins(BC_CONV_I8_I16);
cins.mRegister = BC_REG_ACCU;
mIns.Push(cins);
switch (ins->mOperator)
{
case IA_CMPEQ:
return BC_BRANCHS_EQ;
case IA_CMPNE:
return BC_BRANCHS_NE;
case IA_CMPLES:
return BC_BRANCHS_LE;
case IA_CMPGS:
return BC_BRANCHS_GT;
case IA_CMPGES:
return BC_BRANCHS_GE;
case IA_CMPLS:
return BC_BRANCHS_LT;
}
}
else
{
ByteCodeInstruction cins(BC_BINOP_CMPSI_8);
cins.mValue = ins->mSrc[0].mIntConst;
mIns.Push(cins);
}
}
else
{
if (ins->mSrc[0].mIntConst == 0)
{
switch (ins->mOperator)
{
case IA_CMPEQ:
case IA_CMPLEU:
return BC_BRANCHS_EQ;
case IA_CMPNE:
case IA_CMPGU:
return BC_BRANCHS_NE;
case IA_CMPGEU:
return BC_JUMPS;
case IA_CMPLU:
return BC_NOP;
}
}
else
{
ByteCodeInstruction cins(BC_BINOP_CMPUI_8);
cins.mValue = ins->mSrc[0].mIntConst;
mIns.Push(cins);
}
}
code = TransposeBranchCondition(code);
}
else
@ -2761,17 +2829,57 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const
lins.mRegisterFinal = ins->mSrc[1].mFinal;
mIns.Push(lins);
if (csigned)
{
if (ins->mSrc[0].mIntConst == 0)
{
switch (ins->mOperator)
{
case IA_CMPEQ:
return BC_BRANCHS_EQ;
case IA_CMPNE:
return BC_BRANCHS_NE;
case IA_CMPLES:
return BC_BRANCHS_LE;
case IA_CMPGS:
return BC_BRANCHS_GT;
case IA_CMPGES:
return BC_BRANCHS_GE;
case IA_CMPLS:
return BC_BRANCHS_LT;
}
}
else
{
ByteCodeInstruction cins(BC_BINOP_CMPSI_16);
cins.mValue = ins->mSrc[0].mIntConst;
mIns.Push(cins);
}
}
else
{
if (ins->mSrc[0].mIntConst == 0)
{
switch (ins->mOperator)
{
case IA_CMPEQ:
case IA_CMPLEU:
return BC_BRANCHS_EQ;
case IA_CMPNE:
case IA_CMPGU:
return BC_BRANCHS_NE;
case IA_CMPGEU:
return BC_JUMPS;
case IA_CMPLU:
return BC_NOP;
}
}
else
{
ByteCodeInstruction cins(BC_BINOP_CMPUI_16);
cins.mValue = ins->mSrc[0].mIntConst;
mIns.Push(cins);
}
}
code = TransposeBranchCondition(code);
}
else
@ -3629,8 +3737,19 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p
else
{
ByteCode code = RelationalOperator(iproc, ins);
if (code == BC_JUMPS)
{
IntConstToAccu(1);
}
else if (code == BC_NOP)
{
IntConstToAccu(0);
}
else
{
ByteCodeInstruction bins(ByteCode(code - BC_BRANCHS_EQ + BC_SET_EQ));
mIns.Push(bins);
}
ByteCodeInstruction sins(StoreTypedTmpCodes[ins->mDst.mType]);
sins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mDst.mTemp];
mIns.Push(sins);
@ -3669,7 +3788,7 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p
lins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mSrc[0].mTemp];
lins.mRegisterFinal = ins->mSrc[0].mFinal;
mIns.Push(lins);
ByteCodeInstruction sins(BC_STORE_REG_16);
ByteCodeInstruction sins(InterTypeSize[ins->mDst.mType] == 1 ? BC_STORE_REG_8 : BC_STORE_REG_16);
sins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mDst.mTemp];
mIns.Push(sins);
}
@ -3777,7 +3896,7 @@ bool ByteCodeBasicBlock::JoinTailCodeSequences(void)
return changed;
}
bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
{
bool changed = false;
@ -3807,6 +3926,44 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
}
mIns.SetSize(j);
// check reg addr up
//
#if 1
for (int i = 2; i < mIns.Size(); i++)
{
if (mIns[i].mCode == BC_ADDR_REG && mIns[i].mRegister != BC_REG_ACCU && mIns[i].mRegisterFinal)
{
int j = i;
while (j > 0 && !mIns[j - 1].ChangesAddr() && !mIns[j - 1].UsesAddr() && !mIns[j - 1].ChangesRegister(mIns[j].mRegister) && !mIns[j - 1].UsesRegister(mIns[j].mRegister))
{
ByteCodeInstruction bins = mIns[j - 1];
mIns[j - 1] = mIns[j];
mIns[j] = bins;
j--;
}
}
}
#endif
#if 1
if (phase == 2)
{
for (int i = 2; i < mIns.Size(); i++)
{
if (mIns[i].mCode >= BC_LOAD_ADDR_8 && mIns[i].mCode <= BC_STORE_ADDR_32)
{
int j = i;
while (j > 0 && !mIns[j - 1].ChangesAddr() && !mIns[j - 1].ChangesRegister(mIns[j].mRegister) && !mIns[j - 1].UsesRegister(mIns[j].mRegister))
{
ByteCodeInstruction bins = mIns[j - 1];
mIns[j - 1] = mIns[j];
mIns[j] = bins;
j--;
}
}
}
}
#endif
// mark accu live
uint32 live = mExitLive;
@ -3913,7 +4070,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
progress = true;
}
else if (mIns[i].mCode == BC_STORE_REG_16 &&
!mIns[i + 1].ChangesAddr() && mIns[i + 1].mRegister != mIns[i].mRegister &&
!mIns[i + 1].ChangesAddr() && !mIns[i + 1].UsesAddr() && mIns[i + 1].mRegister != mIns[i].mRegister &&
mIns[i + 2].mCode == BC_ADDR_REG && mIns[i].mRegister == mIns[i + 2].mRegister && mIns[i + 2].mRegisterFinal)
{
mIns[i].mCode = BC_ADDR_REG;
@ -4052,6 +4209,18 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
mIns[i + 1].mRegister = mIns[i + 0].mRegister;
progress = true;
}
#endif
#if 1
else if (
mIns[i + 0].mCode == BC_STORE_REG_16 &&
mIns[i + 1].mCode == BC_LOAD_REG_16 &&
mIns[i + 2].mCode == BC_BINOP_SUBR_16 && mIns[i].mRegister == mIns[i + 2].mRegister && mIns[i + 2].mRegisterFinal)
{
mIns[i + 0].mCode = BC_OP_NEGATE_16;
mIns[i + 1].mCode = BC_BINOP_ADDR_16;
mIns[i + 2].mCode = BC_NOP;
progress = true;
}
#endif
else if (
mIns[i + 0].mCode == BC_LEA_ABS && mIns[i + 0].mRegister == BC_REG_ACCU &&
@ -4175,6 +4344,12 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
mIns[i].mCode = BC_NOP;
progress = true;
}
else if (mIns[i].mCode == BC_LOAD_REG_16 && mIns[i + 1].mCode == BC_LOAD_REG_8 && mIns[i + 1].mRegister == BC_REG_ACCU)
{
mIns[i].mCode = BC_LOAD_REG_8;
mIns[i + 1].mCode = BC_NOP;
progress = true;
}
else if ((mIns[i].mCode == BC_CONST_16 || mIns[i].mCode == BC_CONST_P8 || mIns[i].mCode == BC_CONST_N8) &&
(mIns[i + 1].mCode == BC_CONST_16 || mIns[i + 1].mCode == BC_CONST_P8 || mIns[i + 1].mCode == BC_CONST_N8 || mIns[i + 1].mCode == BC_CONST_32) && mIns[i].mRegister == mIns[i + 1].mRegister)
{
@ -4207,6 +4382,14 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
mIns[i + 1].mCode = BC_NOP;
progress = true;
}
if ((mIns[i].mCode == BC_LOAD_ABS_U8 || mIns[i].mCode == BC_LOAD_ADDR_U8 || mIns[i].mCode == BC_LOAD_ABS_16 || mIns[i].mCode == BC_LOAD_ADDR_16) && mIns[i].mRegister == BC_REG_ACCU &&
mIns[i + 1].mCode == BC_STORE_REG_16 && !(mIns[i + 1].mLive & LIVE_ACCU))
{
mIns[i].mRegister = mIns[i + 1].mRegister;
mIns[i + 1].mCode = BC_NOP;
progress = true;
}
#endif
#if 0
else if ((mIns[i].mCode == BC_LOAD_LOCAL_16 || mIns[i].mCode == BC_LOAD_ABS_16) && mIns[i + 1].mCode == BC_ADDR_REG && mIns[i].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal)
@ -4227,6 +4410,14 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
#endif
if (mIns[i].mCode == BC_ADDR_REG && mIns[i].mRegister == addrTemp)
{
if (mIns[i].mRegisterFinal)
{
int j = i;
while (j > 0 && !mIns[j - 1].UsesRegister(mIns[i].mRegister) && !mIns[j - 1].ChangesRegister(mIns[i].mRegister))
j--;
if (j > 0 && mIns[j - 1].UsesRegister(mIns[i].mRegister))
mIns[j - 1].mRegisterFinal = true;
}
mIns[i].mCode = BC_NOP;
progress = true;
}
@ -4300,9 +4491,9 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void)
changed = true;
} while (progress);
if (mTrueJump && mTrueJump->PeepHoleOptimizer())
if (mTrueJump && mTrueJump->PeepHoleOptimizer(phase))
changed = true;
if (mFalseJump && mFalseJump->PeepHoleOptimizer())
if (mFalseJump && mFalseJump->PeepHoleOptimizer(phase))
changed = true;
}
@ -4328,11 +4519,26 @@ void ByteCodeBasicBlock::Assemble(ByteCodeGenerator* generator)
}
void ByteCodeBasicBlock::Close(ByteCodeBasicBlock* trueJump, ByteCodeBasicBlock* falseJump, ByteCode branch)
{
if (branch == BC_NOP)
{
this->mTrueJump = this->mTrueLink = falseJump;
this->mFalseJump = this->mFalseLink = nullptr;
this->mBranch = BC_JUMPS;
}
else if (branch == BC_JUMPS)
{
this->mTrueJump = this->mTrueLink = trueJump;
this->mFalseJump = this->mFalseLink = nullptr;
this->mBranch = BC_JUMPS;
}
else
{
this->mTrueJump = this->mTrueLink = trueJump;
this->mFalseJump = this->mFalseLink = falseJump;
this->mBranch = branch;
}
}
static int BranchByteSize(int from, int to)
{
@ -4595,12 +4801,14 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure
#if 1
bool progress = false;
int phase = 0;
do {
progress = false;
ResetVisited();
progress = entryBlock->PeepHoleOptimizer();
progress = entryBlock->PeepHoleOptimizer(phase);
ResetVisited();
for (int i = 0; i < mBlocks.Size(); i++)
@ -4611,6 +4819,12 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure
if (entryBlock->JoinTailCodeSequences())
progress = true;
if (!progress && phase < 4)
{
phase++;
progress = true;
}
} while (progress);
#endif

View File

@ -209,6 +209,7 @@ public:
bool ChangesRegister(uint32 reg) const;
bool UsesAccu(void) const;
bool UsesAddr(void) const;
bool UsesRegister(uint32 reg) const;
bool LoadsRegister(uint32 reg) const;
@ -290,7 +291,7 @@ public:
bool JoinTailCodeSequences(void);
bool SameTail(ByteCodeInstruction& ins);
bool PeepHoleOptimizer(void);
bool PeepHoleOptimizer(int phase);
};
class ByteCodeGenerator;

View File

@ -175,9 +175,9 @@ bool Compiler::GenerateCode(void)
mGlobalAnalyzer->mCompilerOptions = mCompilerOptions;
mGlobalAnalyzer->AnalyzeAssembler(dcrtstart->mValue, nullptr);
mGlobalAnalyzer->DumpCallGraph();
// mGlobalAnalyzer->DumpCallGraph();
mGlobalAnalyzer->AutoInline();
mGlobalAnalyzer->DumpCallGraph();
// mGlobalAnalyzer->DumpCallGraph();
mInterCodeGenerator->mCompilerOptions = mCompilerOptions;
mNativeCodeGenerator->mCompilerOptions = mCompilerOptions;

View File

@ -81,7 +81,9 @@ void GlobalAnalyzer::AutoInline(void)
if (doinline)
{
#if 0
printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, f->mCallers.Size());
#endif
f->mFlags |= DTF_INLINE;
for (int j = 0; j < f->mCallers.Size(); j++)
{
@ -130,7 +132,9 @@ void GlobalAnalyzer::AutoInline(void)
if (nparams <= 8)
{
f->mBase->mFlags |= DTF_FASTCALL;
#if 0
printf("FASTCALL %s\n", f->mIdent->mString);
#endif
}
}