From 417f65e2c2009ff33a8638a4c7fdc837f6d9b93e Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 6 Nov 2021 14:29:12 +0100 Subject: [PATCH] Byte code generator optimizations --- autotest/testint16cmp.c | 89 +++++++++++ autotest/testint8cmp.c | 89 +++++++++++ oscar64/ByteCodeGenerator.cpp | 272 ++++++++++++++++++++++++++++++---- oscar64/ByteCodeGenerator.h | 3 +- oscar64/Compiler.cpp | 4 +- oscar64/GlobalAnalyzer.cpp | 4 + 6 files changed, 429 insertions(+), 32 deletions(-) diff --git a/autotest/testint16cmp.c b/autotest/testint16cmp.c index 25219a3..f6f116c 100644 --- a/autotest/testint16cmp.c +++ b/autotest/testint16cmp.c @@ -126,6 +126,69 @@ bool ngez(int a) +bool bequz(unsigned a) +{ + return a == 0; +} + +bool bltuz(unsigned a) +{ + return a < 0; +} + +bool bgtuz(unsigned a) +{ + return a > 0; +} + +bool bleuz(unsigned a) +{ + return a <= 0; +} + +bool bgeuz(unsigned a) +{ + return a >= 0; +} + +bool nequz(unsigned a) +{ + return a == 0; +} + +#pragma native(nequz) + +bool nltuz(unsigned a) +{ + return a < 0; +} + +#pragma native(nltuz) + +bool ngtuz(unsigned a) +{ + return a > 0; +} + +#pragma native(ngtuz) + +bool nleuz(unsigned a) +{ + return a <= 0; +} + +#pragma native(nleuz) + +bool ngeuz(unsigned a) +{ + return a >= 0; +} + +#pragma native(ngeuz) + + + + bool beq1(int a) { return a == 1; @@ -218,6 +281,21 @@ void cmpz(int a) assert(bgef == ngef); } +void cmpuz(unsigned a) +{ + bool beqf = bequz(a), bltf = bltuz(a), bgtf = bgtuz(a), blef = bleuz(a), bgef = bgeuz(a); + bool neqf = nequz(a), nltf = nltuz(a), ngtf = ngtuz(a), nlef = nleuz(a), ngef = ngeuz(a); + + printf("BYTE %u, 0 : EQ %u LT %d GT %u\r", a, beqf, bltf, bgtf); + printf("NATIVE %u, 0 : EQ %u LT %d GT %u\r", a, neqf, nltf, ngtf); + + assert(beqf == neqf); + assert(bltf == nltf); + assert(bgtf == ngtf); + assert(blef == nlef); + assert(bgef == ngef); +} + void cmp1(int a) { bool beqf = beq1(a), bltf = blt1(a), bgtf = bgt1(a), blef = ble1(a), bgef = bge1(a); @@ -316,6 +394,17 @@ int main(void) cmpz(-10000); cmpz(-20000); + cmpuz(0); + cmpuz(1); + cmpuz(255); + cmpuz(256); + cmpuz(10000); + cmpuz(20000); + cmpuz(40000); + cmpuz(32767); + cmpuz(32768); + cmpuz(65535); + cmp1(0); cmp1(1); cmp1(2); diff --git a/autotest/testint8cmp.c b/autotest/testint8cmp.c index f12d45d..b5a6744 100644 --- a/autotest/testint8cmp.c +++ b/autotest/testint8cmp.c @@ -2,6 +2,7 @@ #include typedef signed char int8; +typedef unsigned char uint8; bool beq(int8 a, int8 b) { @@ -128,6 +129,73 @@ bool ngez(int8 a) + + + +bool bequz(uint8 a) +{ + return a == 0; +} + +bool bltuz(uint8 a) +{ + return a < 0; +} + +bool bgtuz(uint8 a) +{ + return a > 0; +} + +bool bleuz(uint8 a) +{ + return a <= 0; +} + +bool bgeuz(uint8 a) +{ + return a >= 0; +} + +bool nequz(uint8 a) +{ + return a == 0; +} + +#pragma native(nequz) + +bool nltuz(uint8 a) +{ + return a < 0; +} + +#pragma native(nltuz) + +bool ngtuz(uint8 a) +{ + return a > 0; +} + +#pragma native(ngtuz) + +bool nleuz(uint8 a) +{ + return a <= 0; +} + +#pragma native(nleuz) + +bool ngeuz(uint8 a) +{ + return a >= 0; +} + +#pragma native(ngeuz) + + + + + bool beq1(int8 a) { return a == 1; @@ -220,6 +288,21 @@ void cmpz(int8 a) assert(bgef == ngef); } +void cmpuz(uint8 a) +{ + bool beqf = bequz(a), bltf = bltuz(a), bgtf = bgtuz(a), blef = bleuz(a), bgef = bgeuz(a); + bool neqf = nequz(a), nltf = nltuz(a), ngtf = ngtuz(a), nlef = nleuz(a), ngef = ngeuz(a); + + printf("BYTE %d, 0 : EQ %d LT %d GT %d\r", a, beqf, bltf, bgtf); + printf("NATIVE %d, 0 : EQ %d LT %d GT %d\r", a, neqf, nltf, ngtf); + + assert(beqf == neqf); + assert(bltf == nltf); + assert(bgtf == ngtf); + assert(blef == nlef); + assert(bgef == ngef); +} + void cmp1(int8 a) { bool beqf = beq1(a), bltf = blt1(a), bgtf = bgt1(a), blef = ble1(a), bgef = bge1(a); @@ -413,6 +496,12 @@ int main(void) cmpz(-1); cmpz(-128); + cmpuz(0); + cmpuz(1); + cmpuz(127); + cmpuz(128); + cmpuz(255); + cmp1(0); cmp1(1); cmp1(2); diff --git a/oscar64/ByteCodeGenerator.cpp b/oscar64/ByteCodeGenerator.cpp index 58140f4..6020d28 100644 --- a/oscar64/ByteCodeGenerator.cpp +++ b/oscar64/ByteCodeGenerator.cpp @@ -245,6 +245,11 @@ bool ByteCodeInstruction::ChangesAddr(void) const return ChangesRegister(BC_REG_ADDR); } +bool ByteCodeInstruction::UsesAddr(void) const +{ + return UsesRegister(BC_REG_ADDR); +} + bool ByteCodeInstruction::LoadsRegister(uint32 reg) const { if (mRegister == reg) @@ -392,10 +397,10 @@ bool ByteCodeInstruction::UsesRegister(uint32 reg) const if (reg == BC_REG_ADDR) { - if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_LOAD_ADDR_32) + if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_STORE_ADDR_32) return true; - if (mCode >= BC_LOAD_ADDR_8 && mCode <= BC_STORE_ADDR_32) + if (mCode == BC_COPY || mCode == BC_STRCPY) return true; if (mCode == BC_JSR || mCode == BC_CALL) @@ -2686,9 +2691,28 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const } else { - ByteCodeInstruction cins(BC_BINOP_CMPUI_8); - cins.mValue = ins->mSrc[1].mIntConst; - mIns.Push(cins); + if (ins->mSrc[1].mIntConst == 0) + { + switch (ins->mOperator) + { + case IA_CMPEQ: + case IA_CMPGEU: + return BC_BRANCHS_EQ; + case IA_CMPNE: + case IA_CMPLU: + return BC_BRANCHS_NE; + case IA_CMPLEU: + return BC_JUMPS; + case IA_CMPGU: + return BC_NOP; + } + } + else + { + ByteCodeInstruction cins(BC_BINOP_CMPUI_8); + cins.mValue = ins->mSrc[1].mIntConst; + mIns.Push(cins); + } } } else if (ins->mSrc[0].mTemp < 0) @@ -2699,15 +2723,59 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const mIns.Push(lins); if (csigned) { - ByteCodeInstruction cins(BC_BINOP_CMPSI_8); - cins.mValue = ins->mSrc[0].mIntConst; - mIns.Push(cins); + if (ins->mSrc[0].mIntConst == 0) + { + ByteCodeInstruction cins(BC_CONV_I8_I16); + cins.mRegister = BC_REG_ACCU; + mIns.Push(cins); + + switch (ins->mOperator) + { + case IA_CMPEQ: + return BC_BRANCHS_EQ; + case IA_CMPNE: + return BC_BRANCHS_NE; + case IA_CMPLES: + return BC_BRANCHS_LE; + case IA_CMPGS: + return BC_BRANCHS_GT; + case IA_CMPGES: + return BC_BRANCHS_GE; + case IA_CMPLS: + return BC_BRANCHS_LT; + } + } + else + { + ByteCodeInstruction cins(BC_BINOP_CMPSI_8); + cins.mValue = ins->mSrc[0].mIntConst; + mIns.Push(cins); + } } else { - ByteCodeInstruction cins(BC_BINOP_CMPUI_8); - cins.mValue = ins->mSrc[0].mIntConst; - mIns.Push(cins); + if (ins->mSrc[0].mIntConst == 0) + { + switch (ins->mOperator) + { + case IA_CMPEQ: + case IA_CMPLEU: + return BC_BRANCHS_EQ; + case IA_CMPNE: + case IA_CMPGU: + return BC_BRANCHS_NE; + case IA_CMPGEU: + return BC_JUMPS; + case IA_CMPLU: + return BC_NOP; + } + } + else + { + ByteCodeInstruction cins(BC_BINOP_CMPUI_8); + cins.mValue = ins->mSrc[0].mIntConst; + mIns.Push(cins); + } } code = TransposeBranchCondition(code); } @@ -2762,15 +2830,55 @@ ByteCode ByteCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const mIns.Push(lins); if (csigned) { - ByteCodeInstruction cins(BC_BINOP_CMPSI_16); - cins.mValue = ins->mSrc[0].mIntConst; - mIns.Push(cins); + if (ins->mSrc[0].mIntConst == 0) + { + switch (ins->mOperator) + { + case IA_CMPEQ: + return BC_BRANCHS_EQ; + case IA_CMPNE: + return BC_BRANCHS_NE; + case IA_CMPLES: + return BC_BRANCHS_LE; + case IA_CMPGS: + return BC_BRANCHS_GT; + case IA_CMPGES: + return BC_BRANCHS_GE; + case IA_CMPLS: + return BC_BRANCHS_LT; + } + } + else + { + ByteCodeInstruction cins(BC_BINOP_CMPSI_16); + cins.mValue = ins->mSrc[0].mIntConst; + mIns.Push(cins); + } } else { - ByteCodeInstruction cins(BC_BINOP_CMPUI_16); - cins.mValue = ins->mSrc[0].mIntConst; - mIns.Push(cins); + if (ins->mSrc[0].mIntConst == 0) + { + switch (ins->mOperator) + { + case IA_CMPEQ: + case IA_CMPLEU: + return BC_BRANCHS_EQ; + case IA_CMPNE: + case IA_CMPGU: + return BC_BRANCHS_NE; + case IA_CMPGEU: + return BC_JUMPS; + case IA_CMPLU: + return BC_NOP; + } + } + else + { + ByteCodeInstruction cins(BC_BINOP_CMPUI_16); + cins.mValue = ins->mSrc[0].mIntConst; + mIns.Push(cins); + } } code = TransposeBranchCondition(code); } @@ -3629,8 +3737,19 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p else { ByteCode code = RelationalOperator(iproc, ins); - ByteCodeInstruction bins(ByteCode(code - BC_BRANCHS_EQ + BC_SET_EQ)); - mIns.Push(bins); + if (code == BC_JUMPS) + { + IntConstToAccu(1); + } + else if (code == BC_NOP) + { + IntConstToAccu(0); + } + else + { + ByteCodeInstruction bins(ByteCode(code - BC_BRANCHS_EQ + BC_SET_EQ)); + mIns.Push(bins); + } ByteCodeInstruction sins(StoreTypedTmpCodes[ins->mDst.mType]); sins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mDst.mTemp]; mIns.Push(sins); @@ -3669,7 +3788,7 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p lins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mSrc[0].mTemp]; lins.mRegisterFinal = ins->mSrc[0].mFinal; mIns.Push(lins); - ByteCodeInstruction sins(BC_STORE_REG_16); + ByteCodeInstruction sins(InterTypeSize[ins->mDst.mType] == 1 ? BC_STORE_REG_8 : BC_STORE_REG_16); sins.mRegister = BC_REG_TMP + iproc->mTempOffset[ins->mDst.mTemp]; mIns.Push(sins); } @@ -3777,7 +3896,7 @@ bool ByteCodeBasicBlock::JoinTailCodeSequences(void) return changed; } -bool ByteCodeBasicBlock::PeepHoleOptimizer(void) +bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase) { bool changed = false; @@ -3807,6 +3926,44 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) } mIns.SetSize(j); + // check reg addr up + // +#if 1 + for (int i = 2; i < mIns.Size(); i++) + { + if (mIns[i].mCode == BC_ADDR_REG && mIns[i].mRegister != BC_REG_ACCU && mIns[i].mRegisterFinal) + { + int j = i; + while (j > 0 && !mIns[j - 1].ChangesAddr() && !mIns[j - 1].UsesAddr() && !mIns[j - 1].ChangesRegister(mIns[j].mRegister) && !mIns[j - 1].UsesRegister(mIns[j].mRegister)) + { + ByteCodeInstruction bins = mIns[j - 1]; + mIns[j - 1] = mIns[j]; + mIns[j] = bins; + j--; + } + } + } +#endif +#if 1 + if (phase == 2) + { + for (int i = 2; i < mIns.Size(); i++) + { + if (mIns[i].mCode >= BC_LOAD_ADDR_8 && mIns[i].mCode <= BC_STORE_ADDR_32) + { + int j = i; + while (j > 0 && !mIns[j - 1].ChangesAddr() && !mIns[j - 1].ChangesRegister(mIns[j].mRegister) && !mIns[j - 1].UsesRegister(mIns[j].mRegister)) + { + ByteCodeInstruction bins = mIns[j - 1]; + mIns[j - 1] = mIns[j]; + mIns[j] = bins; + j--; + } + } + } + } +#endif + // mark accu live uint32 live = mExitLive; @@ -3913,7 +4070,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) progress = true; } else if (mIns[i].mCode == BC_STORE_REG_16 && - !mIns[i + 1].ChangesAddr() && mIns[i + 1].mRegister != mIns[i].mRegister && + !mIns[i + 1].ChangesAddr() && !mIns[i + 1].UsesAddr() && mIns[i + 1].mRegister != mIns[i].mRegister && mIns[i + 2].mCode == BC_ADDR_REG && mIns[i].mRegister == mIns[i + 2].mRegister && mIns[i + 2].mRegisterFinal) { mIns[i].mCode = BC_ADDR_REG; @@ -4052,6 +4209,18 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mRegister = mIns[i + 0].mRegister; progress = true; } +#endif +#if 1 + else if ( + mIns[i + 0].mCode == BC_STORE_REG_16 && + mIns[i + 1].mCode == BC_LOAD_REG_16 && + mIns[i + 2].mCode == BC_BINOP_SUBR_16 && mIns[i].mRegister == mIns[i + 2].mRegister && mIns[i + 2].mRegisterFinal) + { + mIns[i + 0].mCode = BC_OP_NEGATE_16; + mIns[i + 1].mCode = BC_BINOP_ADDR_16; + mIns[i + 2].mCode = BC_NOP; + progress = true; + } #endif else if ( mIns[i + 0].mCode == BC_LEA_ABS && mIns[i + 0].mRegister == BC_REG_ACCU && @@ -4175,7 +4344,13 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i].mCode = BC_NOP; progress = true; } - else if ((mIns[i].mCode == BC_CONST_16 || mIns[i].mCode == BC_CONST_P8 || mIns[i].mCode == BC_CONST_N8) && + else if (mIns[i].mCode == BC_LOAD_REG_16 && mIns[i + 1].mCode == BC_LOAD_REG_8 && mIns[i + 1].mRegister == BC_REG_ACCU) + { + mIns[i].mCode = BC_LOAD_REG_8; + mIns[i + 1].mCode = BC_NOP; + progress = true; + } + else if ((mIns[i].mCode == BC_CONST_16 || mIns[i].mCode == BC_CONST_P8 || mIns[i].mCode == BC_CONST_N8) && (mIns[i + 1].mCode == BC_CONST_16 || mIns[i + 1].mCode == BC_CONST_P8 || mIns[i + 1].mCode == BC_CONST_N8 || mIns[i + 1].mCode == BC_CONST_32) && mIns[i].mRegister == mIns[i + 1].mRegister) { mIns[i].mCode = BC_NOP; @@ -4207,6 +4382,14 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mCode = BC_NOP; progress = true; } + + if ((mIns[i].mCode == BC_LOAD_ABS_U8 || mIns[i].mCode == BC_LOAD_ADDR_U8 || mIns[i].mCode == BC_LOAD_ABS_16 || mIns[i].mCode == BC_LOAD_ADDR_16) && mIns[i].mRegister == BC_REG_ACCU && + mIns[i + 1].mCode == BC_STORE_REG_16 && !(mIns[i + 1].mLive & LIVE_ACCU)) + { + mIns[i].mRegister = mIns[i + 1].mRegister; + mIns[i + 1].mCode = BC_NOP; + progress = true; + } #endif #if 0 else if ((mIns[i].mCode == BC_LOAD_LOCAL_16 || mIns[i].mCode == BC_LOAD_ABS_16) && mIns[i + 1].mCode == BC_ADDR_REG && mIns[i].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal) @@ -4227,6 +4410,14 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) #endif if (mIns[i].mCode == BC_ADDR_REG && mIns[i].mRegister == addrTemp) { + if (mIns[i].mRegisterFinal) + { + int j = i; + while (j > 0 && !mIns[j - 1].UsesRegister(mIns[i].mRegister) && !mIns[j - 1].ChangesRegister(mIns[i].mRegister)) + j--; + if (j > 0 && mIns[j - 1].UsesRegister(mIns[i].mRegister)) + mIns[j - 1].mRegisterFinal = true; + } mIns[i].mCode = BC_NOP; progress = true; } @@ -4300,9 +4491,9 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) changed = true; } while (progress); - if (mTrueJump && mTrueJump->PeepHoleOptimizer()) + if (mTrueJump && mTrueJump->PeepHoleOptimizer(phase)) changed = true; - if (mFalseJump && mFalseJump->PeepHoleOptimizer()) + if (mFalseJump && mFalseJump->PeepHoleOptimizer(phase)) changed = true; } @@ -4329,9 +4520,24 @@ void ByteCodeBasicBlock::Assemble(ByteCodeGenerator* generator) void ByteCodeBasicBlock::Close(ByteCodeBasicBlock* trueJump, ByteCodeBasicBlock* falseJump, ByteCode branch) { - this->mTrueJump = this->mTrueLink = trueJump; - this->mFalseJump = this->mFalseLink = falseJump; - this->mBranch = branch; + if (branch == BC_NOP) + { + this->mTrueJump = this->mTrueLink = falseJump; + this->mFalseJump = this->mFalseLink = nullptr; + this->mBranch = BC_JUMPS; + } + else if (branch == BC_JUMPS) + { + this->mTrueJump = this->mTrueLink = trueJump; + this->mFalseJump = this->mFalseLink = nullptr; + this->mBranch = BC_JUMPS; + } + else + { + this->mTrueJump = this->mTrueLink = trueJump; + this->mFalseJump = this->mFalseLink = falseJump; + this->mBranch = branch; + } } static int BranchByteSize(int from, int to) @@ -4595,12 +4801,14 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure #if 1 bool progress = false; + int phase = 0; + do { progress = false; ResetVisited(); - progress = entryBlock->PeepHoleOptimizer(); + progress = entryBlock->PeepHoleOptimizer(phase); ResetVisited(); for (int i = 0; i < mBlocks.Size(); i++) @@ -4611,6 +4819,12 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure if (entryBlock->JoinTailCodeSequences()) progress = true; + if (!progress && phase < 4) + { + phase++; + progress = true; + } + } while (progress); #endif diff --git a/oscar64/ByteCodeGenerator.h b/oscar64/ByteCodeGenerator.h index 5fe40c5..48446ee 100644 --- a/oscar64/ByteCodeGenerator.h +++ b/oscar64/ByteCodeGenerator.h @@ -209,6 +209,7 @@ public: bool ChangesRegister(uint32 reg) const; bool UsesAccu(void) const; + bool UsesAddr(void) const; bool UsesRegister(uint32 reg) const; bool LoadsRegister(uint32 reg) const; @@ -290,7 +291,7 @@ public: bool JoinTailCodeSequences(void); bool SameTail(ByteCodeInstruction& ins); - bool PeepHoleOptimizer(void); + bool PeepHoleOptimizer(int phase); }; class ByteCodeGenerator; diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index cc29cbb..7ba3300 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -175,9 +175,9 @@ bool Compiler::GenerateCode(void) mGlobalAnalyzer->mCompilerOptions = mCompilerOptions; mGlobalAnalyzer->AnalyzeAssembler(dcrtstart->mValue, nullptr); - mGlobalAnalyzer->DumpCallGraph(); +// mGlobalAnalyzer->DumpCallGraph(); mGlobalAnalyzer->AutoInline(); - mGlobalAnalyzer->DumpCallGraph(); +// mGlobalAnalyzer->DumpCallGraph(); mInterCodeGenerator->mCompilerOptions = mCompilerOptions; mNativeCodeGenerator->mCompilerOptions = mCompilerOptions; diff --git a/oscar64/GlobalAnalyzer.cpp b/oscar64/GlobalAnalyzer.cpp index 41824d7..a7e7bce 100644 --- a/oscar64/GlobalAnalyzer.cpp +++ b/oscar64/GlobalAnalyzer.cpp @@ -81,7 +81,9 @@ void GlobalAnalyzer::AutoInline(void) if (doinline) { +#if 0 printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, f->mCallers.Size()); +#endif f->mFlags |= DTF_INLINE; for (int j = 0; j < f->mCallers.Size(); j++) { @@ -130,7 +132,9 @@ void GlobalAnalyzer::AutoInline(void) if (nparams <= 8) { f->mBase->mFlags |= DTF_FASTCALL; +#if 0 printf("FASTCALL %s\n", f->mIdent->mString); +#endif } }