From 0fae7abd72230151e880379a1a388a40f319406a Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 21 Jan 2023 22:41:13 +0100 Subject: [PATCH] Optimize simple inline assembler code --- autotest/asmtest.c | 8 +- autotest/floatcmptest.c | 39 ++- oscar64/Array.h | 20 ++ oscar64/InterCode.cpp | 144 +++++++++- oscar64/InterCode.h | 3 + oscar64/InterCodeGenerator.cpp | 17 +- oscar64/Linker.cpp | 12 + oscar64/Linker.h | 2 + oscar64/NativeCodeGenerator.cpp | 472 +++++++++++++++++++++++++++++--- oscar64/NativeCodeGenerator.h | 5 + oscar64/Parser.cpp | 18 ++ 11 files changed, 688 insertions(+), 52 deletions(-) diff --git a/autotest/asmtest.c b/autotest/asmtest.c index 8dc3f3b..7529549 100644 --- a/autotest/asmtest.c +++ b/autotest/asmtest.c @@ -3,7 +3,7 @@ int asum(int a, int b) { - __asm + return __asm { clc lda a @@ -12,14 +12,14 @@ int asum(int a, int b) lda a + 1 adc b + 1 sta accu + 1 - } + }; } int bsum(int a, int b) { puts("Hello\n"); - __asm + return __asm { clc lda a @@ -28,7 +28,7 @@ int bsum(int a, int b) lda a + 1 adc b + 1 sta accu + 1 - } + }; } int b, t[10]; diff --git a/autotest/floatcmptest.c b/autotest/floatcmptest.c index 3585cce..9afa51f 100644 --- a/autotest/floatcmptest.c +++ b/autotest/floatcmptest.c @@ -12,16 +12,50 @@ bool flt(float a, float b) return a < b; } +bool fle(float a, float b) +{ + return a <= b; +} + bool fgt(float a, float b) { return a > b; } -void cmpflt(float a, float b, bool eq, bool lt, bool gt) +bool fge(float a, float b) { + return a >= b; +} + + +volatile float f; + +inline void cmpflt(float a, float b, bool eq, bool lt, bool gt) +{ + bool le = eq || lt; + bool ge = eq || gt; + assert(feq(a, b) == eq); assert(flt(a, b) == lt); assert(fgt(a, b) == gt); + assert(fle(a, b) == le); + assert(fge(a, b) == ge); + + f = a; + + assert(feq(f, b) == eq); + assert(flt(f, b) == lt); + assert(fgt(f, b) == gt); + assert(fle(f, b) == le); + assert(fge(f, b) == ge); + + f = b; + + assert(feq(a, f) == eq); + assert(flt(a, f) == lt); + assert(fgt(a, f) == gt); + assert(fle(a, f) == le); + assert(fge(a, f) == ge); } int main(void) @@ -31,7 +65,7 @@ int main(void) cmpflt( 1.0, 0.0, false, false, true); cmpflt(-1.0, 0.0, false, true, false); - +#if 1 cmpflt( 1.0, 1.0, true, false, false); cmpflt( 1.0, 2.0, false, true, false); cmpflt( 2.0, 1.0, false, false, true); @@ -60,6 +94,7 @@ int main(void) cmpflt( -1.0, -1.000001, false, false, true); cmpflt( -1.000001, -1.0, false, true, false); cmpflt( -1.000001, -1.000001, true, false, false); +#endif return 0; } diff --git a/oscar64/Array.h b/oscar64/Array.h index f9151ef..84b3c5d 100644 --- a/oscar64/Array.h +++ b/oscar64/Array.h @@ -281,6 +281,26 @@ public: Grow(at, false); } + int RemoveAll(const T & t) + { + int j = 0, i = 0; + while (i < size) + { + if (array[i] != t) + { + if (i != j) + array[j] = array[i]; + j++; + } + i++; + } + + Grow(j, false); + + return i - j; + } + + T Top(void) const { return array[size - 1]; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index de49eff..d2ede14 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -768,6 +768,63 @@ static double ConstantFolding(InterOperator oper, double val1, double val2 = 0.0 } } +InterOperator InvertRelational(InterOperator oper) +{ + switch (oper) + { + case IA_CMPGES: + return IA_CMPLS; + case IA_CMPLES: + return IA_CMPGS; + case IA_CMPGS: + return IA_CMPLES; + case IA_CMPLS: + return IA_CMPGES; + case IA_CMPGEU: + return IA_CMPLU; + case IA_CMPLEU: + return IA_CMPGU; + case IA_CMPGU: + return IA_CMPLEU; + case IA_CMPLU: + return IA_CMPGEU; + case IA_CMPEQ: + return IA_CMPNE; + case IA_CMPNE: + return IA_CMPEQ; + default: + return oper; + } + +} + +InterOperator MirrorRelational(InterOperator oper) +{ + switch (oper) + { + case IA_CMPGES: + return IA_CMPLES; + case IA_CMPLES: + return IA_CMPGES; + case IA_CMPGS: + return IA_CMPLS; + case IA_CMPLS: + return IA_CMPGS; + case IA_CMPGEU: + return IA_CMPLEU; + case IA_CMPLEU: + return IA_CMPGEU; + case IA_CMPGU: + return IA_CMPLU; + case IA_CMPLU: + return IA_CMPGU; + default: + return oper; + } + +} + + static void ConversionConstantFold(InterInstruction * ins, const InterOperand & cop) { switch (ins->mOperator) @@ -12389,6 +12446,90 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati changed = true; } #endif +#if 1 + else if ( + mInstructions[i + 0]->mCode == IC_RELATIONAL_OPERATOR && + mInstructions[i + 1]->mCode == IC_RELATIONAL_OPERATOR && + (mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal && mInstructions[i + 1]->mSrc[1].mTemp < 0 || + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && mInstructions[i + 1]->mSrc[0].mTemp < 0) + ) + { + int v = mInstructions[i + 1]->mSrc[1].mIntConst; + InterOperator op = mInstructions[i + 1]->mOperator; + if (mInstructions[i + 1]->mSrc[1].mTemp >= 0) + { + v = mInstructions[i + 1]->mSrc[0].mIntConst; + op = MirrorRelational(op); + } + + bool flip = false, istrue = false, isfalse = true; + + switch (op) + { + case IA_CMPEQ: + flip = v == 0; + isfalse = (v != 0 && v != 1); + break; + case IA_CMPNE: + flip = v != 0; + istrue = (v != 0 && v != 1); + break; + case IA_CMPGEU: + case IA_CMPGES: + istrue = v <= 0; + isfalse = v > 1; + break; + case IA_CMPGU: + case IA_CMPGS: + istrue = v < 0; + isfalse = v >= 1; + break; + case IA_CMPLEU: + case IA_CMPLES: + flip = true; + isfalse = v < 0; + istrue = v >= 1; + break; + case IA_CMPLU: + case IA_CMPLS: + flip = true; + isfalse = v <= 0; + istrue = v > 1; + break; + } + + if (istrue) + { + mInstructions[i + 1]->mCode = IC_CONSTANT; + mInstructions[i + 1]->mConst.mType = IT_BOOL; + mInstructions[i + 1]->mConst.mIntConst = 1; + mInstructions[i + 1]->mSrc[0].mTemp = -1; + mInstructions[i + 1]->mSrc[0].mType = IT_NONE; + mInstructions[i + 1]->mSrc[1].mTemp = -1; + mInstructions[i + 1]->mSrc[1].mType = IT_NONE; + mInstructions[i + 1]->mNumOperands = 0; + } + else if (isfalse) + { + mInstructions[i + 1]->mCode = IC_CONSTANT; + mInstructions[i + 1]->mConst.mType = IT_BOOL; + mInstructions[i + 1]->mConst.mIntConst = 0; + mInstructions[i + 1]->mSrc[0].mTemp = -1; + mInstructions[i + 1]->mSrc[0].mType = IT_NONE; + mInstructions[i + 1]->mSrc[1].mTemp = -1; + mInstructions[i + 1]->mSrc[1].mType = IT_NONE; + mInstructions[i + 1]->mNumOperands = 0; + } + else + { + mInstructions[i + 0]->mDst = mInstructions[i + 1]->mDst; + mInstructions[i + 1]->mCode = IC_NONE; mInstructions[i + 1]->mNumOperands = 0; + if (flip) + mInstructions[i + 0]->mOperator = InvertRelational(mInstructions[i + 0]->mOperator); + } + changed = true; + } +#endif #if 1 else if ( mInstructions[i + 1]->mCode == IC_LOAD_TEMPORARY && mExitRequiredTemps[mInstructions[i + 1]->mDst.mTemp] && @@ -12413,6 +12554,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 1]->mSrc[0] = io; changed = true; } + else if ( mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal && @@ -13140,7 +13282,7 @@ InterCodeProcedure::InterCodeProcedure(InterCodeModule * mod, const Location & l mID = mModule->mProcedures.Size(); mModule->mProcedures.Push(this); mLinkerObject->mProc = this; - mCallerSavedTemps = 16; + mCallerSavedTemps = BC_REG_TMP_SAVED - BC_REG_TMP; } InterCodeProcedure::~InterCodeProcedure(void) diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index ae84f9e..4969af8 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -133,6 +133,9 @@ typedef GrowingArray GrowingVariableArray; #define INVALID_TEMPORARY (-1) +InterOperator MirrorRelational(InterOperator oper); +InterOperator InvertRelational(InterOperator oper); + class IntegerValueRange { public: diff --git a/oscar64/InterCodeGenerator.cpp b/oscar64/InterCodeGenerator.cpp index d0ab0b8..c250644 100644 --- a/oscar64/InterCodeGenerator.cpp +++ b/oscar64/InterCodeGenerator.cpp @@ -2545,6 +2545,9 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* block->Append(ins); InterInstruction * jins = new InterInstruction(exp->mLocation, IC_ASSEMBLER); + jins->mDst.mTemp = proc->AddTemporary(IT_INT32); + jins->mDst.mType = IT_INT32; + jins->mSrc[0].mType = IT_POINTER; jins->mSrc[0].mTemp = ins->mDst.mTemp; jins->mNumOperands = 1; @@ -2599,6 +2602,8 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* } block->Append(jins); + + return ExValue(exp->mDecType, jins->mDst.mTemp); } return ExValue(TheVoidTypeDeclaration); @@ -3586,9 +3591,15 @@ InterCodeProcedure* InterCodeGenerator::TranslateProcedure(InterCodeModule * mod { proc->mFastCallProcedure = true; - dec->mLinkerObject->mNumTemporaries = 1; - dec->mLinkerObject->mTemporaries[0] = BC_REG_FPARAMS; - dec->mLinkerObject->mTempSizes[0] = BC_REG_FPARAMS_END - BC_REG_FPARAMS; + if (dec->mFastCallSize > 0 && dec->mFastCallBase < BC_REG_FPARAMS_END - BC_REG_FPARAMS) + { + dec->mLinkerObject->mNumTemporaries = 1; + dec->mLinkerObject->mTemporaries[0] = BC_REG_FPARAMS + dec->mFastCallBase; + if (dec->mFastCallBase + dec->mFastCallBase < BC_REG_FPARAMS_END - BC_REG_FPARAMS) + dec->mLinkerObject->mTempSizes[0] = dec->mFastCallSize; + else + dec->mLinkerObject->mTempSizes[0] = BC_REG_FPARAMS_END - BC_REG_FPARAMS - dec->mFastCallBase; + } } if (dec->mBase->mBase->mType != DT_TYPE_VOID && dec->mBase->mBase->mType != DT_TYPE_STRUCT) diff --git a/oscar64/Linker.cpp b/oscar64/Linker.cpp index 9738a23..3f1942d 100644 --- a/oscar64/Linker.cpp +++ b/oscar64/Linker.cpp @@ -43,6 +43,18 @@ void LinkerObject::AddReference(const LinkerReference& ref) mReferences.Push(nref); } +LinkerReference* LinkerObject::FindReference(int offset) +{ + for (int i = 0; i < mReferences.Size(); i++) + { + if (mReferences[i]->mOffset == offset) + return mReferences[i]; + } + + return nullptr; +} + + void LinkerObject::MarkRelevant(void) { if (!(mFlags & LOBJF_RELEVANT)) diff --git a/oscar64/Linker.h b/oscar64/Linker.h index f831772..73168e8 100644 --- a/oscar64/Linker.h +++ b/oscar64/Linker.h @@ -178,6 +178,8 @@ public: GrowingArray mReferences; + LinkerReference* FindReference(int offset); + void AddReference(const LinkerReference& ref); void MoveToSection(LinkerSection* section); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index c0db621..f37bdac 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -89,6 +89,31 @@ void NativeRegisterDataSet::ResetMask(void) +void NativeRegisterDataSet::ResetWorkRegs(void) +{ + ResetZeroPage(BC_REG_WORK_Y); + ResetZeroPage(BC_REG_ADDR + 0); + ResetZeroPage(BC_REG_ADDR + 1); + + for (int i = 0; i < 4; i++) + ResetZeroPage(BC_REG_ACCU + i); + for (int i = 0; i < 8; i++) + ResetZeroPage(BC_REG_WORK + i); +} + +void NativeRegisterDataSet::ResetWorkMasks(void) +{ + mRegs[BC_REG_WORK_Y].ResetMask(); + mRegs[BC_REG_ADDR + 0].ResetMask(); + mRegs[BC_REG_ADDR + 1].ResetMask(); + + for (int i = 0; i < 4; i++) + mRegs[BC_REG_ACCU + i].ResetMask(); + for (int i = 0; i < 8; i++) + mRegs[BC_REG_WORK + i].ResetMask(); +} + + void NativeRegisterDataSet::ResetZeroPage(int addr) { mRegs[addr].Reset(); @@ -325,6 +350,12 @@ bool NativeCodeInstruction::IsUsedResultInstructions(NumberSet& requiredTemps) requiredTemps -= BC_REG_WORK + i; } + if (mFlags & NICF_USE_WORKREGS) + { + for (int i = 0; i < 10; i++) + requiredTemps += BC_REG_WORK + i; + } + requiredTemps += BC_REG_LOCALS; requiredTemps += BC_REG_LOCALS + 1; if (mLinkerObject) @@ -1124,13 +1155,13 @@ bool NativeCodeInstruction::UsesZeroPage(int address) const return true; else if (mType == ASMIT_JSR) { - if (address >= BC_REG_ACCU && address < BC_REG_ACCU + 4) - return true; - if (address >= BC_REG_WORK && address < BC_REG_WORK + 4) - return true; - if (mFlags & NCIF_RUNTIME) { + if (address >= BC_REG_ACCU && address < BC_REG_ACCU + 4) + return true; + + if (address >= BC_REG_WORK && address < BC_REG_WORK + 8) + return true; if (mFlags & NCIF_USE_ZP_32_X) { @@ -1146,6 +1177,12 @@ bool NativeCodeInstruction::UsesZeroPage(int address) const } else { + if (mFlags & NICF_USE_WORKREGS) + { + if (address >= BC_REG_WORK && address < BC_REG_WORK + 10) + return true; + } + if (address >= BC_REG_FPARAMS && address < BC_REG_FPARAMS_END) return true; @@ -1563,13 +1600,7 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data) data.mRegs[CPU_REG_X].Reset(); data.mRegs[CPU_REG_Y].Reset(); - for (int i = 0; i < 4; i++) - { - data.mRegs[BC_REG_ACCU + i].Reset(); - data.mRegs[BC_REG_WORK + i].Reset(); - data.mRegs[BC_REG_ADDR + i].Reset(); - } - data.mRegs[BC_REG_WORK_Y].Reset(); + data.ResetWorkRegs(); if (mFlags & NCIF_FEXEC) { @@ -2233,13 +2264,7 @@ bool NativeCodeInstruction::BitFieldForwarding(NativeRegisterDataSet& data, AsmI data.mRegs[CPU_REG_X].ResetMask(); data.mRegs[CPU_REG_Y].ResetMask(); - for (int i = 0; i < 4; i++) - { - data.mRegs[BC_REG_ACCU + i].ResetMask(); - data.mRegs[BC_REG_WORK + i].ResetMask(); - data.mRegs[BC_REG_ADDR + i].ResetMask(); - } - data.mRegs[BC_REG_WORK_Y].ResetMask(); + data.ResetWorkMasks(); if (!(mFlags & NCIF_RUNTIME) || (mFlags & NCIF_FEXEC)) { @@ -2694,13 +2719,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT data.ResetIndirect(0); - for (int i = 0; i < 4; i++) - { - data.ResetZeroPage(BC_REG_ACCU + i); - data.ResetZeroPage(BC_REG_WORK + i); - data.ResetZeroPage(BC_REG_ADDR + i); - } - data.ResetZeroPage(BC_REG_WORK_Y); + data.ResetWorkRegs(); if (!(mFlags & NCIF_RUNTIME) || (mFlags & NCIF_FEXEC)) { @@ -3849,6 +3868,12 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet& requiredTemps += i; } } + else if (mFlags & NICF_USE_WORKREGS) + { + for (int i = 0; i < 10; i++) + if (!providedTemps[BC_REG_WORK + i]) + requiredTemps += BC_REG_WORK + i; + } else { if (mLinkerObject) @@ -3864,12 +3889,13 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet& } } #endif + providedTemps += BC_REG_ADDR + 0; + providedTemps += BC_REG_ADDR + 1; + for (int i = 0; i < 4; i++) - { providedTemps += BC_REG_ACCU + i; + for (int i = 0; i < 8; i++) providedTemps += BC_REG_WORK + i; - providedTemps += BC_REG_ADDR + i; - } providedTemps += CPU_REG_A; providedTemps += CPU_REG_X; @@ -10173,6 +10199,166 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In if (ins->mSrc[0].mType == IT_FLOAT) { + if (ins->mSrc[0].mTemp < 0 || ins->mSrc[1].mTemp < 0) + { + InterOperator op = ins->mOperator; + + int ci = 0, vi = 1; + if (ins->mSrc[1].mTemp < 0) + { + ci = 1; + vi = 0; + op = MirrorRelational(op); + } + + union { float f; unsigned int v; } cc; + cc.f = ins->mSrc[ci].mFloatConst; + + int ti = BC_REG_TMP + proc->mTempOffset[ins->mSrc[vi].mTemp]; + + if (cc.f == 0) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x7f)); + mIns.Push(NativeCodeInstruction(ASMIT_ORA, ASMIM_ZERO_PAGE, ti + 2)); + mIns.Push(NativeCodeInstruction(ASMIT_ORA, ASMIM_ZERO_PAGE, ti + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_ORA, ASMIM_ZERO_PAGE, ti + 0)); + + if (op == IA_CMPEQ) + { + Close(trueJump, falseJump, ASMIT_BEQ); + } + else if (op == IA_CMPNE) + { + Close(trueJump, falseJump, ASMIT_BNE); + } + else + { + NativeCodeBasicBlock* nblock = nproc->AllocateBlock(); + + if (op == IA_CMPGES || op == IA_CMPGEU || op == IA_CMPLES || op == IA_CMPLEU) + Close(trueJump, nblock, ASMIT_BEQ); + else + Close(falseJump, nblock, ASMIT_BEQ); + + nblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + if (op == IA_CMPGES || op == IA_CMPGEU || op == IA_CMPGS || op == IA_CMPGU) + nblock->Close(trueJump, falseJump, ASMIT_BPL); + else + nblock->Close(trueJump, falseJump, ASMIT_BMI); + } + return; + } + else + { + NativeCodeBasicBlock* eblock1 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock2 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock3 = nproc->AllocateBlock(); + + + if (op == IA_CMPEQ) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); + Close(eblock1, falseJump, ASMIT_BEQ); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 2)); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); + eblock1->Close(eblock2, falseJump, ASMIT_BEQ); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 1)); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); + eblock2->Close(eblock3, falseJump, ASMIT_BEQ); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 0)); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, cc.v & 0xff)); + eblock3->Close(trueJump, falseJump, ASMIT_BEQ); + return; + } + else if (op == IA_CMPNE) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); + Close(eblock1, trueJump, ASMIT_BEQ); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 2)); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); + eblock1->Close(eblock2, trueJump, ASMIT_BEQ); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 1)); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); + eblock2->Close(eblock3, trueJump, ASMIT_BEQ); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 0)); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, cc.v & 0xff)); + eblock3->Close(falseJump, trueJump, ASMIT_BEQ); + return; + } + else if (op == IA_CMPGS || op == IA_CMPGES || op == IA_CMPGU || op == IA_CMPGEU) + { + NativeCodeBasicBlock* eblock0 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock1 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock2 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock3 = nproc->AllocateBlock(); + NativeCodeBasicBlock* nblock = nproc->AllocateBlock(); + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + if (cc.f < 0) + Close(trueJump, eblock0, ASMIT_BPL); + else + Close(falseJump, eblock0, ASMIT_BMI); + eblock0->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); + eblock0->Close(nblock, eblock1, ASMIT_BNE); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 2)); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); + eblock1->Close(nblock, eblock2, ASMIT_BNE); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 1)); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); + eblock2->Close(nblock, eblock3, ASMIT_BNE); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 0)); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, cc.v & 0xff)); + if (op == IA_CMPGES || op == IA_CMPLES || op == IA_CMPGEU || op == IA_CMPLEU) + eblock3->Close(nblock, trueJump, ASMIT_BNE); + else + eblock3->Close(nblock, falseJump, ASMIT_BNE); + + if (cc.f < 0) + nblock->Close(trueJump, falseJump, ASMIT_BCC); + else + nblock->Close(trueJump, falseJump, ASMIT_BCS); + return; + } + else + { + NativeCodeBasicBlock* eblock0 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock1 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock2 = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock3 = nproc->AllocateBlock(); + NativeCodeBasicBlock* nblock = nproc->AllocateBlock(); + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 3)); + if (cc.f < 0) + Close(falseJump, eblock0, ASMIT_BPL); + else + Close(trueJump, eblock0, ASMIT_BMI); + eblock0->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 24) & 0xff)); + eblock0->Close(nblock, eblock1, ASMIT_BNE); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 2)); + eblock1->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 16) & 0xff)); + eblock1->Close(nblock, eblock2, ASMIT_BNE); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 1)); + eblock2->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, (cc.v >> 8) & 0xff)); + eblock2->Close(nblock, eblock3, ASMIT_BNE); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, ti + 0)); + eblock3->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, cc.v & 0xff)); + if (op == IA_CMPGES || op == IA_CMPLES || op == IA_CMPGEU || op == IA_CMPLEU) + eblock3->Close(nblock, trueJump, ASMIT_BNE); + else + eblock3->Close(nblock, falseJump, ASMIT_BNE); + + if (cc.f < 0) + nblock->Close(trueJump, falseJump, ASMIT_BCS); + else + nblock->Close(trueJump, falseJump, ASMIT_BCC); + return; + } + } + } + int li = 0, ri = 1; if (op == IA_CMPLEU || op == IA_CMPGU || op == IA_CMPLES || op == IA_CMPGS) { @@ -11249,6 +11435,69 @@ void NativeCodeBasicBlock::CallFunction(InterCodeProcedure* proc, NativeCodeProc } } +NativeCodeInstruction NativeCodeBasicBlock::DecodeNative(LinkerObject* lobj, int& offset) const +{ + uint8 op = lobj->mData[offset++]; + + AsmInsData d = DecInsData[op]; + int address = 0; + LinkerObject * linkerObject = nullptr; + uint32 flags = NCIF_LOWER | NCIF_UPPER; + LinkerReference * lref; + + switch (d.mMode) + { + case ASMIM_ABSOLUTE: + case ASMIM_ABSOLUTE_X: + case ASMIM_ABSOLUTE_Y: + case ASMIM_INDIRECT: + lref = lobj->FindReference(offset); + address = lobj->mData[offset++]; + address += lobj->mData[offset++] << 8; + if (lref) + { + linkerObject = lref->mRefObject; + address = lref->mRefOffset; + } + else + flags |= NCIF_VOLATILE; + break; + case ASMIM_ZERO_PAGE: + case ASMIM_ZERO_PAGE_X: + case ASMIM_ZERO_PAGE_Y: + case ASMIM_INDIRECT_X: + case ASMIM_INDIRECT_Y: + lref = lobj->FindReference(offset); + address = lobj->mData[offset++]; + if (lref && (lref->mFlags & LREF_TEMPORARY)) + address += lobj->mTemporaries[lref->mRefOffset]; + else if (address >= BC_REG_TMP) + flags |= NCIF_VOLATILE; + break; + case ASMIM_RELATIVE: + address = lobj->mData[offset++]; + address += offset; + break; + case ASMIM_IMMEDIATE: + lref = lobj->FindReference(offset); + address = lobj->mData[offset++]; + if (lref) + { + d.mMode = ASMIM_IMMEDIATE_ADDRESS; + linkerObject = lref->mRefObject; + address = lref->mRefOffset; + if (lref->mFlags & LREF_LOWBYTE) + flags = NCIF_LOWER; + else + flags = NCIF_UPPER; + } + break; + } + + return NativeCodeInstruction(d.mType, d.mMode, address, linkerObject, flags); +} + + void NativeCodeBasicBlock::CallAssembler(InterCodeProcedure* proc, NativeCodeProcedure * nproc, const InterInstruction* ins) { if (ins->mCode == IC_ASSEMBLER) @@ -11283,7 +11532,57 @@ void NativeCodeBasicBlock::CallAssembler(InterCodeProcedure* proc, NativeCodePro flags |= NCIF_USE_CPU_REG_Y; assert(ins->mSrc[0].mLinkerObject); - mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, ins->mSrc[0].mIntConst, ins->mSrc[0].mLinkerObject, flags)); + + if (ins->mCode == IC_ASSEMBLER) + { + GrowingArray tains(NativeCodeInstruction(ASMIT_INV, ASMIM_IMPLIED)); + + uint32 uflags = 0; + bool simple = true; + int i = 0; + while (i < ins->mSrc[0].mLinkerObject->mSize) + { + NativeCodeInstruction dins = DecodeNative(ins->mSrc[0].mLinkerObject, i); + if (dins.mMode == ASMIM_RELATIVE) + simple = false; + if (dins.mType == ASMIT_JMP) + simple = false; + if (dins.mType == ASMIT_RTS && i != ins->mSrc[0].mLinkerObject->mSize) + simple = false; + if (dins.mType == ASMIT_JSR) + { + dins.mFlags |= uflags; + } + + if (dins.mType == ASMIT_BRK || dins.mMode == ASMIM_INDIRECT_X || dins.mMode == ASMIM_INDIRECT || + dins.mType == ASMIT_SEI || dins.mType == ASMIT_CLI || dins.mType == ASMIT_SED || dins.mType == ASMIT_CLD || + dins.mType == ASMIT_RTI || dins.mType == ASMIT_TXS || dins.mType == ASMIT_TSX) + simple = false; + if (dins.mFlags & NCIF_VOLATILE) + simple = false; + + if (dins.mMode == ASMIM_ZERO_PAGE && dins.mAddress >= BC_REG_WORK && dins.mAddress < BC_REG_WORK + 8) + uflags |= NICF_USE_WORKREGS; + if (dins.ChangesAccu()) + uflags |= NCIF_USE_CPU_REG_A; + if (dins.ChangesXReg()) + uflags |= NCIF_USE_CPU_REG_X; + if (dins.ChangesYReg()) + uflags |= NCIF_USE_CPU_REG_Y; + tains.Push(dins); + } + + if (simple) + { + for (int i = 0; i + 1 < tains.Size(); i++) + mIns.Push(tains[i]); + } + else + mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, ins->mSrc[0].mIntConst, ins->mSrc[0].mLinkerObject, flags)); + } + else + mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, ins->mSrc[0].mIntConst, ins->mSrc[0].mLinkerObject, flags)); + lf = ins->mSrc[0].mLinkerObject->mFlags; } else @@ -14269,6 +14568,40 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) break; } #endif +#if 1 + if (i + 4 < mIns.Size() && + mIns[i + 0].ChangesAccuAndFlag() && + mIns[i + 1].mType == ASMIT_CMP && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 0x01 && + mIns[i + 2].mType == ASMIT_LDA && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 0x00 && + mIns[i + 3].mType == ASMIT_ADC && mIns[i + 3].mMode == ASMIM_IMMEDIATE && mIns[i + 3].mAddress == 0xff && + mIns[i + 4].mType == ASMIT_EOR && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 0xff) + { + changed = true; + + NativeCodeBasicBlock* eblock = proc->AllocateBlock(); + NativeCodeBasicBlock* neblock = proc->AllocateBlock(); + NativeCodeBasicBlock* rblock = proc->AllocateBlock(); + + rblock->mTrueJump = mTrueJump; + rblock->mFalseJump = mFalseJump; + rblock->mBranch = mBranch; + + for (int j = i + 5; j < mIns.Size(); j++) + rblock->mIns.Push(mIns[j]); + mIns.SetSize(i + 1); + mIns[i + 0].mLive |= LIVE_CPU_REG_Z; + + mTrueJump = neblock; + mFalseJump = eblock; + mBranch = ASMIT_BNE; + + neblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0xff)); + + eblock->Close(rblock, nullptr, ASMIT_JMP); + neblock->Close(rblock, nullptr, ASMIT_JMP); + break; + } +#endif #if 1 if (i + 12 < mIns.Size()) { @@ -22206,6 +22539,7 @@ bool NativeCodeBasicBlock::GlobalValueForwarding(void) mIns.Insert(i + 1, NativeCodeInstruction(carryop)); } + if (this->mTrueJump && this->mTrueJump->GlobalValueForwarding()) changed = true; if (this->mFalseJump && this->mFalseJump->GlobalValueForwarding()) @@ -22276,15 +22610,9 @@ bool NativeCodeBasicBlock::ValueForwarding(const NativeRegisterDataSet& data, bo mNDataSet.ResetZeroPage(ins.mAddress); if (ins.mType == ASMIT_JSR) { - for (int i = 0; i < 4; i++) - { - mNDataSet.ResetZeroPage(BC_REG_ACCU + i); - mNDataSet.ResetZeroPage(BC_REG_WORK + i); - mNDataSet.ResetZeroPage(BC_REG_ADDR + i); - } - mNDataSet.ResetZeroPage(BC_REG_WORK_Y); + mNDataSet.ResetWorkRegs(); - if (!(ins.mFlags & NCIF_RUNTIME) || (ins.mFlags & NCIF_FEXEC)) + if (!(ins.mFlags & NCIF_RUNTIME) && ins.mLinkerObject || (ins.mFlags & NCIF_FEXEC)) { if (ins.mLinkerObject && ins.mLinkerObject->mProc) { @@ -22683,7 +23011,67 @@ bool NativeCodeBasicBlock::ValueForwarding(const NativeRegisterDataSet& data, bo } } } +#if 1 + if (mTrueJump && !mFalseJump && mTrueJump->mIns.Size() == 1 && (mTrueJump->mBranch == ASMIT_BEQ || mTrueJump->mBranch == ASMIT_BNE) && mTrueJump->mIns[0].mMode == ASMIM_IMMEDIATE) + { + if (mTrueJump->mIns[0].mType == ASMIT_CMP) + { + if (mNDataSet.mRegs[CPU_REG_A].mMode == NRDM_IMMEDIATE) + { + NativeCodeBasicBlock* target; + if (mNDataSet.mRegs[CPU_REG_A].mValue == mTrueJump->mIns[0].mAddress) + { + if (mTrueJump->mBranch == ASMIT_BEQ) + target = mTrueJump->mTrueJump; + else + target = mTrueJump->mFalseJump; + } + else + { + if (mTrueJump->mBranch == ASMIT_BNE) + target = mTrueJump->mTrueJump; + else + target = mTrueJump->mFalseJump; + } + + mTrueJump->mEntryBlocks.RemoveAll(this); + mTrueJump->mNumEntries--; + target->mEntryBlocks.Push(this); + target->mNumEntries++; + mTrueJump = target; + } + } + else if (mTrueJump->mIns[0].mType == ASMIT_ORA && mTrueJump->mIns[0].mAddress == 0) + { + if (mNDataSet.mRegs[CPU_REG_A].mMode == NRDM_IMMEDIATE) + { + NativeCodeBasicBlock* target; + + if (mNDataSet.mRegs[CPU_REG_A].mValue == 0) + { + if (mTrueJump->mBranch == ASMIT_BEQ) + target = mTrueJump->mTrueJump; + else + target = mTrueJump->mFalseJump; + } + else + { + if (mTrueJump->mBranch == ASMIT_BNE) + target = mTrueJump->mTrueJump; + else + target = mTrueJump->mFalseJump; + } + + mTrueJump->mEntryBlocks.RemoveAll(this); + mTrueJump->mNumEntries--; + target->mEntryBlocks.Push(this); + target->mNumEntries++; + mTrueJump = target; + } + } + } +#endif assert(mIndex == 1000 || mNumEntries == mEntryBlocks.Size()); if (this->mTrueJump && this->mTrueJump->ValueForwarding(mNDataSet, global, final)) @@ -22768,12 +23156,12 @@ bool NativeCodeBasicBlock::RemoveSimpleLoopUnusedIndex(void) } else if (ins.mType == ASMIT_JSR) { + required += BC_REG_ADDR + 0; + required += BC_REG_ADDR + 1; for (int i = 0; i < 4; i++) - { required += BC_REG_ACCU + i; + for (int i = 0; i < 8; i++) required += BC_REG_WORK + i; - required += BC_REG_ADDR + i; - } required += BC_REG_WORK_Y; if (!(ins.mFlags & NCIF_RUNTIME) || (ins.mFlags & NCIF_FEXEC)) @@ -33290,7 +33678,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "hcw_irq"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "main"); #if 1 int step = 0; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index e825ed6..5c37715 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -49,6 +49,8 @@ struct NativeRegisterDataSet void ResetIndirect(int reg); void ResetX(void); void ResetY(void); + void ResetWorkRegs(void); + void ResetWorkMasks(void); void Intersect(const NativeRegisterDataSet& set); void IntersectMask(const NativeRegisterDataSet& set); }; @@ -80,6 +82,7 @@ static const uint32 NCIF_USE_CPU_REG_Y = 0x00004000; // use a 32bit zero page register indexed by X for JSR static const uint32 NCIF_USE_ZP_32_X = 0x00008000; static const uint32 NICF_USE_ZP_ADDR = 0x00010000; +static const uint32 NICF_USE_WORKREGS = 0x00020000; class NativeCodeInstruction { @@ -182,6 +185,8 @@ public: NativeRegisterDataSet mDataSet, mNDataSet, mFDataSet; + NativeCodeInstruction DecodeNative(LinkerObject * lobj, int& offset) const; + int PutBranch(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, AsmInsType code, int offset); int PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, int offset); int JumpByteSize(NativeCodeBasicBlock * target, int offset); diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index b06633c..f8c9ff3 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -1594,6 +1594,24 @@ Expression* Parser::ParseSimpleExpression(void) exp = nexp->ConstantFold(mErrors); } break; + case TK_ASM: + mScanner->NextToken(); + if (mScanner->mToken == TK_OPEN_BRACE) + { + mScanner->NextToken(); + exp = ParseAssembler(); + exp->mDecType = TheSignedLongTypeDeclaration; + if (mScanner->mToken == TK_CLOSE_BRACE) + mScanner->NextToken(); + else + mErrors->Error(mScanner->mLocation, EERR_SYNTAX, "'}' expected"); + } + else + { + mErrors->Error(mScanner->mLocation, EERR_SYNTAX, "'{' expected"); + exp = new Expression(mScanner->mLocation, EX_VOID); + } + break; default: mErrors->Error(mScanner->mLocation, EERR_SYNTAX, "Term starts with invalid token", TokenNames[mScanner->mToken]); mScanner->NextToken();