From 640007546f07d5c07a38fff3f11a48e95cb9e9cf Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 8 Jan 2022 15:51:17 +0100 Subject: [PATCH] Add __assume builtin function --- README.md | 18 ++++--- include/c64/sprites.c | 10 ++++ oscar64/ByteCodeGenerator.cpp | 3 ++ oscar64/Declaration.h | 3 +- oscar64/GlobalAnalyzer.cpp | 1 + oscar64/InterCode.cpp | 81 ++++++++++++++++++++++++++++++- oscar64/InterCode.h | 7 ++- oscar64/InterCodeGenerator.cpp | 19 ++++++++ oscar64/NativeCodeGenerator.cpp | 84 ++++++++++++++++++++++++++++++--- oscar64/NativeCodeGenerator.h | 1 + oscar64/Parser.cpp | 7 +++ oscar64/Scanner.cpp | 3 ++ oscar64/Scanner.h | 1 + 13 files changed, 222 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 6d519a4..e5380b3 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ The goal is to implement the actual C standard and not some subset for performan ## Limits and Errors -There are still several open areas, but most targets have been reached. The current Dhrystone performance is 61 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes). +There are still several open areas, but most targets have been reached. The current Dhrystone performance is 61 iterations per second with byte code (11504) and 278 iterations with native code (12184 Bytes). ### Language @@ -34,7 +34,7 @@ There are still several open areas, but most targets have been reached. The cur ### Standard Libraries -* No file functions +* No standard file functions, but CBM based file ops ### Runtime @@ -44,7 +44,6 @@ There are still several open areas, but most targets have been reached. The cur ### Optimizing * Complex loop optimization -* Partial block domination analysis * Auto variables placed on fixed stack for known call sequence ### Intermediate code generation @@ -54,15 +53,13 @@ There are still several open areas, but most targets have been reached. The cur ### Native code generation -* More byte operation optimisation required -* Simple loop detection and optimisation not complete - ## Compiler arguments The compiler is command line driven, and creates an executable .prg file. oscar64 {-i=includePath} [-o=output.prg] [-rt=runtime.c] [-e] [-n] [-dSYMBOL[=value]] {source.c} +* -v : verbose output for diagnostics * -i : additional include paths * -o : optional output file name * -rt : alternative runtime library, replaces the crt.c @@ -91,6 +88,8 @@ Will switch to the lowercase PETSCII font and translate the strings while printi PETSCII string literals can also be generated using a "p" or "P" prefix such as: printf(p"Hello World\n"); + +Screen codes can be generated similar using "s" or "S" prefix. Input from the console will also be translated accordingly. @@ -113,6 +112,13 @@ A section of the file can be selected by providing a limit and or an offset into }; +## Language extensions for optimization + +### Additional Optimizer information using __assume() + +The compiler can be provided with additional information using the built in function __assume(cond). This can be useful to mark unreachable code using __assume(false) for e.g. the default of a switch statement. Another good option is to limit the value range of arguments to allow the compiler using byte operations without the need for integer promotion. + + ## Inline Assembler diff --git a/include/c64/sprites.c b/include/c64/sprites.c index 5f73073..aaa7572 100644 --- a/include/c64/sprites.c +++ b/include/c64/sprites.c @@ -12,6 +12,8 @@ void spr_init(char * screen) void spr_set(char sp, bool show, int xpos, int ypos, char image, char color, bool multi, bool xexpand, bool yexpand) { + __assume (sp < 8); + char m = 1 << sp; if (show) @@ -47,6 +49,8 @@ void spr_set(char sp, bool show, int xpos, int ypos, char image, char color, boo void spr_show(char sp, bool show) { + __assume (sp < 8); + if (show) vic.spr_enable |= 1 << sp; else @@ -55,6 +59,8 @@ void spr_show(char sp, bool show) void spr_move(char sp, int xpos, int ypos) { + __assume (sp < 8); + vic.spr_pos[sp].y = ypos; vic.spr_pos[sp].x = xpos & 0xff; if (xpos & 0x100) @@ -65,11 +71,15 @@ void spr_move(char sp, int xpos, int ypos) void spr_image(char sp, char image) { + __assume (sp < 8); + vspriteScreen[sp] = image; } void spr_color(char sp, char color) { + __assume (sp < 8); + vic.spr_color[sp] = color; } diff --git a/oscar64/ByteCodeGenerator.cpp b/oscar64/ByteCodeGenerator.cpp index 96ad0e7..57f8d82 100644 --- a/oscar64/ByteCodeGenerator.cpp +++ b/oscar64/ByteCodeGenerator.cpp @@ -4250,6 +4250,9 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p } return; + case IC_UNREACHABLE: + this->Close(proc->exitBlock, nullptr, BC_JUMPS); + return; } i++; diff --git a/oscar64/Declaration.h b/oscar64/Declaration.h index dbcedac..16090a4 100644 --- a/oscar64/Declaration.h +++ b/oscar64/Declaration.h @@ -133,7 +133,8 @@ enum ExpressionType EX_SWITCH, EX_CASE, EX_DEFAULT, - EX_CONDITIONAL + EX_CONDITIONAL, + EX_ASSUME }; class Expression diff --git a/oscar64/GlobalAnalyzer.cpp b/oscar64/GlobalAnalyzer.cpp index 27dc3cc..5009c49 100644 --- a/oscar64/GlobalAnalyzer.cpp +++ b/oscar64/GlobalAnalyzer.cpp @@ -352,6 +352,7 @@ Declaration * GlobalAnalyzer::Analyze(Expression* exp, Declaration* procDec) break; case EX_BREAK: case EX_CONTINUE: + case EX_ASSUME: break; case EX_TYPE: break; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 547b402..a317fcb 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -2501,6 +2501,9 @@ void InterInstruction::Disassemble(FILE* file) case IC_RETURN: fprintf(file, "RET"); break; + case IC_UNREACHABLE: + fprintf(file, "UNREACHABLE"); + break; } static char typechars[] = "NBCILFP"; @@ -2562,6 +2565,7 @@ InterCodeBasicBlock::InterCodeBasicBlock(void) mLoopHead = false; mChecked = false; mTraceIndex = -1; + mUnreachable = false; } InterCodeBasicBlock::~InterCodeBasicBlock(void) @@ -5371,7 +5375,7 @@ bool InterCodeBasicBlock::PushSinglePathResultInstructions(void) if (ins->mDst.mTemp >= 0 && !providedTemps[ins->mDst.mTemp] && !requiredTemps[ins->mDst.mTemp]) { int j = 0; - while (j < ins->mNumOperands && !(ins->mSrc[j].mTemp >= 0 && providedTemps[ins->mSrc[j].mTemp]) && !IsTempModifiedOnPath(ins->mSrc[j].mTemp, i + 1)) + while (j < ins->mNumOperands && (ins->mSrc[j].mTemp < 0 || !(providedTemps[ins->mSrc[j].mTemp] || IsTempModifiedOnPath(ins->mSrc[j].mTemp, i + 1)))) j++; if (j == ins->mNumOperands && IsMoveable(ins->mCode) && (ins->mCode != IC_LOAD || !hadStore)) @@ -5741,6 +5745,66 @@ bool InterCodeBasicBlock::IsEqual(const InterCodeBasicBlock* block) const return false; } +bool InterCodeBasicBlock::DropUnreachable(void) +{ + if (!mVisited) + { + mVisited = true; + + int i = 0; + while (i < mInstructions.Size() && mInstructions[i]->mCode != IC_UNREACHABLE) + i++; + + if (i < mInstructions.Size()) + { + // kill all instructions after this + mInstructions.SetSize(i + 1); + mFalseJump = nullptr; + mTrueJump = nullptr; + + if (mInstructions.Size() == 1) + mUnreachable = true; + } + else + { + if (mFalseJump) + { + if (mFalseJump->DropUnreachable()) + { + mInstructions.Last()->mCode = IC_JUMP; + mInstructions.Last()->mNumOperands = 0; + mFalseJump = nullptr; + + if (mTrueJump->DropUnreachable()) + { + mTrueJump = nullptr; + mInstructions.SetSize(mInstructions.Size() - 1); + if (mInstructions.Size() == 0) + mUnreachable = true; + } + } + else if (mTrueJump->DropUnreachable()) + { + mInstructions.Last()->mCode = IC_JUMP; + mInstructions.Last()->mNumOperands = 0; + mTrueJump = mFalseJump; + mFalseJump = nullptr; + } + } + else if (mTrueJump && mTrueJump->DropUnreachable()) + { + mTrueJump = nullptr; + mInstructions.SetSize(mInstructions.Size() - 1); + if (mInstructions.Size() == 0) + mUnreachable = true; + } + } + } + + return mUnreachable; +} + + bool InterCodeBasicBlock::OptimizeIntervalCompare(void) { bool changed = false; @@ -7480,6 +7544,21 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Simplified range limited relational ops"); #endif +#if 1 + ResetVisited(); + mEntryBlock->DropUnreachable(); + + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + + BuildDataFlowSets(); + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("Removed unreachable branches"); +#endif + MapVariables(); DisassembleDebug("mapped variabled"); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 158691b..ae76970 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -35,7 +35,8 @@ enum InterCode IC_RETURN_STRUCT, IC_RETURN, IC_ASSEMBLER, - IC_JUMPF + IC_JUMPF, + IC_UNREACHABLE }; enum InterType @@ -448,7 +449,7 @@ public: InterCodeBasicBlock * mTrueJump, * mFalseJump, * mDominator; GrowingInstructionArray mInstructions; - bool mVisited, mInPath, mLoopHead, mChecked, mConditionBlockTrue; + bool mVisited, mInPath, mLoopHead, mChecked, mConditionBlockTrue, mUnreachable; NumberSet mLocalRequiredTemps, mLocalProvidedTemps; NumberSet mEntryRequiredTemps, mEntryProvidedTemps; @@ -572,6 +573,8 @@ public: void CompactInstructions(void); bool OptimizeIntervalCompare(void); + + bool DropUnreachable(void); }; class InterCodeModule; diff --git a/oscar64/InterCodeGenerator.cpp b/oscar64/InterCodeGenerator.cpp index aa348ef..ebadc49 100644 --- a/oscar64/InterCodeGenerator.cpp +++ b/oscar64/InterCodeGenerator.cpp @@ -2605,6 +2605,25 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* return ExValue(TheVoidTypeDeclaration); } + case EX_ASSUME: + { +#if 1 + InterCodeBasicBlock* tblock = new InterCodeBasicBlock(); + proc->Append(tblock); + InterCodeBasicBlock* fblock = new InterCodeBasicBlock(); + proc->Append(fblock); + + TranslateLogic(procType, proc, block, tblock, fblock, exp->mLeft, inlineMapper); + + InterInstruction* ins = new InterInstruction(); + ins->mCode = IC_UNREACHABLE; + fblock->Append(ins); + fblock->Close(nullptr, nullptr); + + block = tblock; +#endif + return ExValue(TheVoidTypeDeclaration); + } case EX_LOGICAL_NOT: { vl = TranslateExpression(procType, proc, block, exp->mLeft, breakBlock, continueBlock, inlineMapper); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 2060722..e4e0429 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -6447,7 +6447,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p int l = Binlog(ins->mSrc[1].mIntConst & 0xffff); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("bitshift"))); @@ -6466,7 +6467,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p NativeCodeBasicBlock* eblock = nproc->AllocateBlock(); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); if (ins->mSrc[1].mTemp < 0) @@ -6570,7 +6572,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p int l = Binlog(ins->mSrc[1].mIntConst & 0xffff); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("bitshift"))); @@ -6586,7 +6589,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p NativeCodeBasicBlock* eblock = nproc->AllocateBlock(); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); if (ins->mSrc[1].mTemp < 0) @@ -6730,7 +6734,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p int l = Binlog(ins->mSrc[1].mIntConst & 0xffff); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("bitshift"))); @@ -6759,7 +6764,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p NativeCodeBasicBlock* eblock = nproc->AllocateBlock(); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); - mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); if (ins->mSrc[1].mTemp < 0) @@ -9671,6 +9677,39 @@ bool NativeCodeBasicBlock::MoveIndirectLoadStoreUp(int at) return false; } +bool NativeCodeBasicBlock::MoveLoadStoreOutOfXYRangeUp(int at) +{ + int j = at - 1; + while (j >= 0) + { + if (mIns[j].MayBeChangedOnAddress(mIns[at + 2])) + return false; + if (mIns[j].ChangesAddress() && mIns[j].SameEffectiveAddress(mIns[at + 1])) + return false; + + if (mIns[j].mType == ASMIT_LDA) + { + if (j > 0 && (mIns[j - 1].mType == ASMIT_CLC || mIns[j - 1].mType == ASMIT_SEC)) + j--; + mIns.Insert(j, mIns[at + 2]); + mIns.Insert(j, mIns[at + 2]); + if (j > 0) + { + mIns[j].mLive |= mIns[j - 1].mLive; + mIns[j + 1].mLive |= mIns[j - 1].mLive; + } + mIns.Remove(at + 3); + mIns.Remove(at + 3); + + return true; + } + + j--; + } + + return false; +} + bool NativeCodeBasicBlock::MoveAbsoluteLoadStoreUp(int at) { int j = at - 1; @@ -11702,6 +11741,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) #endif +#if 1 + for (int i = 0; i + 2 < mIns.Size(); i++) + { + if ((mIns[i + 0].mType == ASMIT_TAX || mIns[i + 0].mType == ASMIT_TAY) && + mIns[i + 1].mType == ASMIT_LDA && (mIns[i + 1].mMode == ASMIM_ABSOLUTE || mIns[i + 1].mMode == ASMIM_ZERO_PAGE) && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_A))) + { + if (MoveLoadStoreOutOfXYRangeUp(i)) + changed = true; + } + } + +#endif + #if 1 if (pass > 1) { @@ -13056,6 +13109,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) progress = true; } #endif +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_STA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_LDA && (mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i + 0].mAddress != mIns[i + 2].mAddress || mIns[i + 1].mMode == ASMIM_ABSOLUTE) && + mIns[i + 2].mType == ASMIT_CLC && + mIns[i + 3].mType == ASMIT_ADC && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 0].mAddress == mIns[i + 3].mAddress && !(mIns[i + 3].mLive & LIVE_MEM)) + { + mIns[i + 3] = mIns[i + 1]; + mIns[i + 3].mType = ASMIT_ADC; + + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + + progress = true; + } +#endif #if 1 else if ( mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_IMMEDIATE && @@ -14942,6 +15011,9 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode } return; + case IC_UNREACHABLE: + block->Close(mExitBlock, nullptr, ASMIT_JMP); + return; } i++; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 66abb9f..e31788f 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -203,6 +203,7 @@ public: bool MoveIndirectLoadStoreUp(int at); bool MoveAbsoluteLoadStoreUp(int at); + bool MoveLoadStoreOutOfXYRangeUp(int at); bool MoveLoadAddImmStoreUp(int at); bool MoveCLCLoadAddZPStoreUp(int at); bool MoveLoadAddZPStoreUp(int at); diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index 46f6166..19c4d00 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -1920,6 +1920,13 @@ Expression* Parser::ParseStatement(void) exp = new Expression(mScanner->mLocation, EX_VOID); } break; + case TK_ASSUME: + mScanner->NextToken(); + exp = new Expression(mScanner->mLocation, EX_ASSUME); + exp->mLeft = ParseParenthesisExpression(); + break; + + default: exp = ParseExpression(); } diff --git a/oscar64/Scanner.cpp b/oscar64/Scanner.cpp index 95b3d45..bc93ea6 100644 --- a/oscar64/Scanner.cpp +++ b/oscar64/Scanner.cpp @@ -47,6 +47,7 @@ const char* TokenNames[] = "'static'", "'extern'", "'inline'", + "'__assume'", "__asm", @@ -1222,6 +1223,8 @@ void Scanner::NextRawToken(void) mToken = TK_INLINE; else if (!strcmp(tkident, "__asm")) mToken = TK_ASM; + else if (!strcmp(tkident, "__assume")) + mToken = TK_ASSUME; else { mToken = TK_IDENT; diff --git a/oscar64/Scanner.h b/oscar64/Scanner.h index e9cbda1..c4a1ec9 100644 --- a/oscar64/Scanner.h +++ b/oscar64/Scanner.h @@ -45,6 +45,7 @@ enum Token TK_STATIC, TK_EXTERN, TK_INLINE, + TK_ASSUME, TK_ASM,