diff --git a/include/crt.c b/include/crt.c index 82fe5d7..f75e4e2 100644 --- a/include/crt.c +++ b/include/crt.c @@ -72,7 +72,10 @@ w0: sta $00 lda #$36 sta $01 -#else + +#elif defined(OSCAR_TARGET_BIN) + +#else byt 0x0b byt 0x08 byt 0x0a @@ -85,12 +88,13 @@ w0: byt 0x00 byt 0x00 byt 0x00 -#endif -// Clear BSS Segment tsx stx spentry +#endif +// Clear BSS Segment + lda #BSSStart diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index 2483e58..038f84d 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -183,6 +183,7 @@ bool Compiler::GenerateCode(void) mInterCodeGenerator->mCompilerOptions = mCompilerOptions; mNativeCodeGenerator->mCompilerOptions = mCompilerOptions; + mInterCodeModule->mCompilerOptions = mCompilerOptions; mInterCodeGenerator->TranslateAssembler(mInterCodeModule, dcrtstart->mValue, nullptr); @@ -341,7 +342,7 @@ bool Compiler::GenerateCode(void) bool Compiler::WriteOutputFile(const char* targetPath) { - char prgPath[200], mapPath[200], asmPath[200], lblPath[200], crtPath[200], intPath[200], bcsPath[200]; + char prgPath[200], mapPath[200], asmPath[200], lblPath[200], intPath[200], bcsPath[200]; strcpy_s(prgPath, targetPath); int i = strlen(prgPath); @@ -352,29 +353,35 @@ bool Compiler::WriteOutputFile(const char* targetPath) strcpy_s(mapPath, prgPath); strcpy_s(asmPath, prgPath); strcpy_s(lblPath, prgPath); - strcpy_s(crtPath, prgPath); strcpy_s(intPath, prgPath); strcpy_s(bcsPath, prgPath); - strcat_s(prgPath, "prg"); strcat_s(mapPath, "map"); strcat_s(asmPath, "asm"); strcat_s(lblPath, "lbl"); - strcat_s(crtPath, "crt"); strcat_s(intPath, "int"); strcat_s(bcsPath, "bcs"); if (mCompilerOptions & COPT_TARGET_PRG) { + strcat_s(prgPath, "prg"); if (mCompilerOptions & COPT_VERBOSE) printf("Writing <%s>\n", prgPath); mLinker->WritePrgFile(prgPath); } else if (mCompilerOptions & COPT_TARGET_CRT16) { + strcat_s(prgPath, "crt"); if (mCompilerOptions & COPT_VERBOSE) - printf("Writing <%s>\n", crtPath); - mLinker->WriteCrtFile(crtPath); + printf("Writing <%s>\n", prgPath); + mLinker->WriteCrtFile(prgPath); + } + else if (mCompilerOptions & COPT_TARGET_BIN) + { + strcat_s(prgPath, "bin"); + if (mCompilerOptions & COPT_VERBOSE) + printf("Writing <%s>\n", prgPath); + mLinker->WriteBinFile(prgPath); } diff --git a/oscar64/CompilerTypes.h b/oscar64/CompilerTypes.h index 7bde944..891d8d0 100644 --- a/oscar64/CompilerTypes.h +++ b/oscar64/CompilerTypes.h @@ -7,11 +7,13 @@ static const uint64 COPT_OPTIMIZE_INLINE = 0x00000002; static const uint64 COPT_OPTIMIZE_AUTO_INLINE = 0x00000010; static const uint64 COPT_OPTIMIZE_AUTO_INLINE_ALL = 0x00000020; +static const uint64 COPT_OPTIMIZE_AUTO_UNROLL = 0x00000040; static const uint64 COPT_TARGET_PRG = 0x100000000ULL; static const uint64 COPT_TARGET_CRT16 = 0x200000000ULL; static const uint64 COPT_TARGET_CRT512 = 0x400000000ULL; static const uint64 COPT_TARGET_COPY = 0x800000000ULL; +static const uint64 COPT_TARGET_BIN = 0x1000000000ULL; static const uint64 COPT_VERBOSE = 0x1000000000ULL; @@ -23,9 +25,9 @@ static const uint64 COPT_OPTIMIZE_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_ static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE; -static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE; +static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_UNROLL; -static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL; +static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL | COPT_OPTIMIZE_AUTO_UNROLL; struct CompilerSettings { diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index ca3e3f0..34f7ab1 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -1,6 +1,6 @@ #include "InterCode.h" +#include "CompilerTypes.h" -#include "InterCode.h" #include #include @@ -78,6 +78,10 @@ void IntegerValueRange::LimitMaxWeak(int64 value) } } +bool IntegerValueRange::IsConstant(void) const +{ + return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue == mMaxValue; +} bool IntegerValueRange::Merge(const IntegerValueRange& range) { @@ -527,10 +531,21 @@ static void LoadConstantFold(InterInstruction* ins, InterInstruction* ains) { const uint8* data; - if (ains) - data = ains->mConst.mLinkerObject->mData + ains->mConst.mIntConst; + LinkerObject * lobj; + int offset; + + if (ains) + { + lobj = ains->mConst.mLinkerObject; + offset = ains->mConst.mIntConst; + } else - data = ins->mSrc[0].mLinkerObject->mData + ins->mSrc[0].mIntConst; + { + lobj = ins->mSrc[0].mLinkerObject; + offset = ins->mSrc[0].mIntConst; + } + + data = lobj->mData + offset; switch (ins->mDst.mType) { @@ -540,16 +555,35 @@ static void LoadConstantFold(InterInstruction* ins, InterInstruction* ains) ins->mConst.mIntConst = data[0]; break; case IT_INT16: - case IT_POINTER: - ins->mConst.mIntConst = data[0] | (data[1] << 8); + ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8); break; + case IT_POINTER: + { + int i = 0; + while (i < lobj->mReferences.Size() && lobj->mReferences[i]->mOffset != offset) + i++; + if (i < lobj->mReferences.Size()) + { + ins->mConst.mLinkerObject = lobj->mReferences[i]->mRefObject; + ins->mConst.mIntConst = lobj->mReferences[i]->mRefOffset; + ins->mConst.mMemory = IM_GLOBAL; + ins->mConst.mOperandSize = ins->mConst.mLinkerObject->mSize; + ins->mConst.mVarIndex = -1; + } + else + { + ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8); + ins->mConst.mMemory = IM_ABSOLUTE; + } + + } break; case IT_INT32: - ins->mConst.mIntConst = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8) | ((int)data[2] << 16) | ((int)data[3] << 24); break; case IT_FLOAT: { union { float f; unsigned int v; } cc; - cc.v = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + cc.v = (int)data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); ins->mConst.mFloatConst = cc.v; } break; } @@ -742,6 +776,13 @@ bool InterInstruction::ReferencesTemp(int temp) const return false; } +InterInstruction* InterInstruction::Clone(void) const +{ + InterInstruction* ins = new InterInstruction(); + *ins = *this; + return ins; +} + bool InterInstruction::IsEqual(const InterInstruction* ins) const { if (mCode != ins->mCode) @@ -2047,6 +2088,15 @@ bool InterInstruction::PropagateConstTemps(const GrowingInstructionPtrArray& cte this->ConstantFolding(); return true; } + else if (mSrc[0].mTemp < 0 && mSrc[1].mTemp >= 0 && ctemps[mSrc[1].mTemp]) + { + InterInstruction* ains = ctemps[mSrc[1].mTemp]; + mSrc[1] = ains->mConst; + mSrc[1].mType = IT_POINTER; + + this->ConstantFolding(); + return true; + } break; } @@ -6774,6 +6824,77 @@ void InterCodeBasicBlock::InnerLoopOptimization(const NumberSet& aliasedParams) } } +void InterCodeBasicBlock::SingleBlockLoopUnrolling(void) +{ + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mNumEntries == 2 && mTrueJump == this) + { + int nins = mInstructions.Size(); + + if (nins > 3 && nins < 20) + { + if (mInstructions[nins - 1]->mCode == IC_BRANCH && + mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mOperator == IA_CMPLU && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp && + mInstructions[nins - 2]->mSrc[0].mTemp < 0 && + mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_ADD && mInstructions[nins - 3]->mDst.mTemp == mInstructions[nins - 2]->mSrc[1].mTemp) + { + int ireg = mInstructions[nins - 3]->mDst.mTemp; + + if (ireg == mInstructions[nins - 3]->mSrc[0].mTemp && mInstructions[nins - 3]->mSrc[1].mTemp < 0 || + ireg == mInstructions[nins - 3]->mSrc[1].mTemp && mInstructions[nins - 3]->mSrc[0].mTemp < 0) + { + + int i = 0; + while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ireg) + i++; + if (i == nins - 3) + { + if (mDominator->mTrueValueRange[ireg].IsConstant()) + { + int start = mDominator->mTrueValueRange[ireg].mMinValue; + int end = mInstructions[nins - 2]->mSrc[0].mIntConst; + int step = mInstructions[nins - 3]->mSrc[0].mTemp < 0 ? mInstructions[nins - 3]->mSrc[0].mIntConst : mInstructions[nins - 3]->mSrc[1].mIntConst; + int count = (end - start) / step; + + if (count < 5 && (nins - 3) * count < 20) + { + mInstructions.SetSize(nins - 2); + nins -= 2; + for (int i = 1; i < count; i++) + { + for (int j = 0; j < nins; j++) + { + mInstructions.Push(mInstructions[j]->Clone()); + } + } + + mNumEntries--; + mLoopHead = false; + mTrueJump = mFalseJump; + mFalseJump = nullptr; + + InterInstruction* jins = new InterInstruction(); + jins->mCode = IC_JUMP; + mInstructions.Push(jins); + } + } + } + } + } + } + } + + if (mTrueJump) + mTrueJump->SingleBlockLoopUnrolling(); + if (mFalseJump) + mFalseJump->SingleBlockLoopUnrolling(); + } +} + + void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedParams) { if (!mVisited) @@ -8320,6 +8441,15 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Simplified range limited relational ops"); #endif +#if 1 + if (mModule->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) + { + ResetVisited(); + mEntryBlock->SingleBlockLoopUnrolling(); + + DisassembleDebug("Single Block loop unrolling"); + } +#endif #if 1 ResetVisited(); @@ -8737,7 +8867,7 @@ void InterCodeProcedure::Disassemble(const char* name, bool dumpSets) } InterCodeModule::InterCodeModule(void) - : mGlobalVars(nullptr), mProcedures(nullptr) + : mGlobalVars(nullptr), mProcedures(nullptr), mCompilerOptions(0) { } diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 7814847..037c947 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -148,6 +148,8 @@ public: bool Same(const IntegerValueRange& range) const; bool Merge(const IntegerValueRange& range); + bool IsConstant(void) const; + void LimitMin(int64 value); void LimitMax(int64 value); @@ -273,6 +275,8 @@ public: bool IsEqual(const InterInstruction* ins) const; bool IsEqualSource(const InterInstruction* ins) const; + InterInstruction* Clone(void) const; + bool ReferencesTemp(int temp) const; bool UsesTemp(int temp) const; @@ -432,6 +436,7 @@ public: void PeepholeOptimization(void); void SingleBlockLoopOptimisation(const NumberSet& aliasedParams); + void SingleBlockLoopUnrolling(void); bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); void CollectLoopPath(const GrowingArray& body, GrowingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); @@ -527,4 +532,7 @@ public: GrowingInterCodeProcedurePtrArray mProcedures; GrowingVariableArray mGlobalVars; + + uint64 mCompilerOptions; + }; diff --git a/oscar64/Linker.cpp b/oscar64/Linker.cpp index b47295a..1752e37 100644 --- a/oscar64/Linker.cpp +++ b/oscar64/Linker.cpp @@ -272,8 +272,8 @@ void Linker::Link(void) } } - mProgramStart = 0x0801; - mProgramEnd = 0x0801; + mProgramStart = 0xffff; + mProgramEnd = 0x0000; int address = 0; @@ -282,8 +282,13 @@ void Linker::Link(void) LinkerRegion* lrgn = mRegions[i]; address = lrgn->mStart + lrgn->mNonzero; - if (lrgn->mNonzero && address > mProgramEnd) - mProgramEnd = address; + if (lrgn->mNonzero) + { + if (lrgn->mStart < mProgramStart) + mProgramStart = lrgn->mStart; + if (address > mProgramEnd) + mProgramEnd = address; + } } // Place stack segment @@ -393,6 +398,20 @@ static const char* LinkerSectionTypeNames[] = { "STACK" }; +bool Linker::WriteBinFile(const char* filename) +{ + FILE* file; + fopen_s(&file, filename, "wb"); + if (file) + { + int done = fwrite(mMemory + mProgramStart, 1, mProgramEnd - mProgramStart, file); + fclose(file); + return done == mProgramEnd - mProgramStart; + } + else + return false; +} + bool Linker::WritePrgFile(const char* filename) { FILE* file; diff --git a/oscar64/Linker.h b/oscar64/Linker.h index 7eb38eb..93d396f 100644 --- a/oscar64/Linker.h +++ b/oscar64/Linker.h @@ -147,6 +147,7 @@ public: bool WriteAsmFile(const char* filename); bool WriteLblFile(const char* filename); bool WriteCrtFile(const char* filename); + bool WriteBinFile(const char* filename); GrowingArray mReferences; GrowingArray mRegions; diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 167f6d9..319134c 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -200,6 +200,11 @@ int main(int argc, const char** argv) compiler->mCompilerOptions |= COPT_TARGET_CRT16; compiler->AddDefine(Ident::Unique("OSCAR_TARGET_CRT16"), "1"); } + else if (!strcmp(targetFormat, "bin")) + { + compiler->mCompilerOptions |= COPT_TARGET_BIN; + compiler->AddDefine(Ident::Unique("OSCAR_TARGET_BIN"), "1"); + } else compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid target format option", targetFormat);