Add auto loop unrolling and binary (verbatim) format linking

This commit is contained in:
drmortalwombat 2022-01-21 21:52:16 +01:00
parent cc927d778f
commit fad67d18aa
8 changed files with 200 additions and 24 deletions

View File

@ -72,6 +72,9 @@ w0:
sta $00
lda #$36
sta $01
#elif defined(OSCAR_TARGET_BIN)
#else
byt 0x0b
byt 0x08
@ -85,12 +88,13 @@ w0:
byt 0x00
byt 0x00
byt 0x00
#endif
// Clear BSS Segment
tsx
stx spentry
#endif
// Clear BSS Segment
lda #<BSSStart
sta ip
lda #>BSSStart

View File

@ -183,6 +183,7 @@ bool Compiler::GenerateCode(void)
mInterCodeGenerator->mCompilerOptions = mCompilerOptions;
mNativeCodeGenerator->mCompilerOptions = mCompilerOptions;
mInterCodeModule->mCompilerOptions = mCompilerOptions;
mInterCodeGenerator->TranslateAssembler(mInterCodeModule, dcrtstart->mValue, nullptr);
@ -341,7 +342,7 @@ bool Compiler::GenerateCode(void)
bool Compiler::WriteOutputFile(const char* targetPath)
{
char prgPath[200], mapPath[200], asmPath[200], lblPath[200], crtPath[200], intPath[200], bcsPath[200];
char prgPath[200], mapPath[200], asmPath[200], lblPath[200], intPath[200], bcsPath[200];
strcpy_s(prgPath, targetPath);
int i = strlen(prgPath);
@ -352,29 +353,35 @@ bool Compiler::WriteOutputFile(const char* targetPath)
strcpy_s(mapPath, prgPath);
strcpy_s(asmPath, prgPath);
strcpy_s(lblPath, prgPath);
strcpy_s(crtPath, prgPath);
strcpy_s(intPath, prgPath);
strcpy_s(bcsPath, prgPath);
strcat_s(prgPath, "prg");
strcat_s(mapPath, "map");
strcat_s(asmPath, "asm");
strcat_s(lblPath, "lbl");
strcat_s(crtPath, "crt");
strcat_s(intPath, "int");
strcat_s(bcsPath, "bcs");
if (mCompilerOptions & COPT_TARGET_PRG)
{
strcat_s(prgPath, "prg");
if (mCompilerOptions & COPT_VERBOSE)
printf("Writing <%s>\n", prgPath);
mLinker->WritePrgFile(prgPath);
}
else if (mCompilerOptions & COPT_TARGET_CRT16)
{
strcat_s(prgPath, "crt");
if (mCompilerOptions & COPT_VERBOSE)
printf("Writing <%s>\n", crtPath);
mLinker->WriteCrtFile(crtPath);
printf("Writing <%s>\n", prgPath);
mLinker->WriteCrtFile(prgPath);
}
else if (mCompilerOptions & COPT_TARGET_BIN)
{
strcat_s(prgPath, "bin");
if (mCompilerOptions & COPT_VERBOSE)
printf("Writing <%s>\n", prgPath);
mLinker->WriteBinFile(prgPath);
}

View File

@ -7,11 +7,13 @@ static const uint64 COPT_OPTIMIZE_INLINE = 0x00000002;
static const uint64 COPT_OPTIMIZE_AUTO_INLINE = 0x00000010;
static const uint64 COPT_OPTIMIZE_AUTO_INLINE_ALL = 0x00000020;
static const uint64 COPT_OPTIMIZE_AUTO_UNROLL = 0x00000040;
static const uint64 COPT_TARGET_PRG = 0x100000000ULL;
static const uint64 COPT_TARGET_CRT16 = 0x200000000ULL;
static const uint64 COPT_TARGET_CRT512 = 0x400000000ULL;
static const uint64 COPT_TARGET_COPY = 0x800000000ULL;
static const uint64 COPT_TARGET_BIN = 0x1000000000ULL;
static const uint64 COPT_VERBOSE = 0x1000000000ULL;
@ -23,9 +25,9 @@ static const uint64 COPT_OPTIMIZE_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_
static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE;
static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE;
static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_UNROLL;
static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL;
static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL | COPT_OPTIMIZE_AUTO_UNROLL;
struct CompilerSettings
{

View File

@ -1,6 +1,6 @@
#include "InterCode.h"
#include "CompilerTypes.h"
#include "InterCode.h"
#include <stdio.h>
#include <math.h>
@ -78,6 +78,10 @@ void IntegerValueRange::LimitMaxWeak(int64 value)
}
}
bool IntegerValueRange::IsConstant(void) const
{
return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue == mMaxValue;
}
bool IntegerValueRange::Merge(const IntegerValueRange& range)
{
@ -527,10 +531,21 @@ static void LoadConstantFold(InterInstruction* ins, InterInstruction* ains)
{
const uint8* data;
LinkerObject * lobj;
int offset;
if (ains)
data = ains->mConst.mLinkerObject->mData + ains->mConst.mIntConst;
{
lobj = ains->mConst.mLinkerObject;
offset = ains->mConst.mIntConst;
}
else
data = ins->mSrc[0].mLinkerObject->mData + ins->mSrc[0].mIntConst;
{
lobj = ins->mSrc[0].mLinkerObject;
offset = ins->mSrc[0].mIntConst;
}
data = lobj->mData + offset;
switch (ins->mDst.mType)
{
@ -540,16 +555,35 @@ static void LoadConstantFold(InterInstruction* ins, InterInstruction* ains)
ins->mConst.mIntConst = data[0];
break;
case IT_INT16:
case IT_POINTER:
ins->mConst.mIntConst = data[0] | (data[1] << 8);
ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8);
break;
case IT_POINTER:
{
int i = 0;
while (i < lobj->mReferences.Size() && lobj->mReferences[i]->mOffset != offset)
i++;
if (i < lobj->mReferences.Size())
{
ins->mConst.mLinkerObject = lobj->mReferences[i]->mRefObject;
ins->mConst.mIntConst = lobj->mReferences[i]->mRefOffset;
ins->mConst.mMemory = IM_GLOBAL;
ins->mConst.mOperandSize = ins->mConst.mLinkerObject->mSize;
ins->mConst.mVarIndex = -1;
}
else
{
ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8);
ins->mConst.mMemory = IM_ABSOLUTE;
}
} break;
case IT_INT32:
ins->mConst.mIntConst = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
ins->mConst.mIntConst = (int)data[0] | ((int)data[1] << 8) | ((int)data[2] << 16) | ((int)data[3] << 24);
break;
case IT_FLOAT:
{
union { float f; unsigned int v; } cc;
cc.v = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
cc.v = (int)data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
ins->mConst.mFloatConst = cc.v;
} break;
}
@ -742,6 +776,13 @@ bool InterInstruction::ReferencesTemp(int temp) const
return false;
}
InterInstruction* InterInstruction::Clone(void) const
{
InterInstruction* ins = new InterInstruction();
*ins = *this;
return ins;
}
bool InterInstruction::IsEqual(const InterInstruction* ins) const
{
if (mCode != ins->mCode)
@ -2047,6 +2088,15 @@ bool InterInstruction::PropagateConstTemps(const GrowingInstructionPtrArray& cte
this->ConstantFolding();
return true;
}
else if (mSrc[0].mTemp < 0 && mSrc[1].mTemp >= 0 && ctemps[mSrc[1].mTemp])
{
InterInstruction* ains = ctemps[mSrc[1].mTemp];
mSrc[1] = ains->mConst;
mSrc[1].mType = IT_POINTER;
this->ConstantFolding();
return true;
}
break;
}
@ -6774,6 +6824,77 @@ void InterCodeBasicBlock::InnerLoopOptimization(const NumberSet& aliasedParams)
}
}
void InterCodeBasicBlock::SingleBlockLoopUnrolling(void)
{
if (!mVisited)
{
mVisited = true;
if (mLoopHead && mNumEntries == 2 && mTrueJump == this)
{
int nins = mInstructions.Size();
if (nins > 3 && nins < 20)
{
if (mInstructions[nins - 1]->mCode == IC_BRANCH &&
mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mOperator == IA_CMPLU && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp &&
mInstructions[nins - 2]->mSrc[0].mTemp < 0 &&
mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_ADD && mInstructions[nins - 3]->mDst.mTemp == mInstructions[nins - 2]->mSrc[1].mTemp)
{
int ireg = mInstructions[nins - 3]->mDst.mTemp;
if (ireg == mInstructions[nins - 3]->mSrc[0].mTemp && mInstructions[nins - 3]->mSrc[1].mTemp < 0 ||
ireg == mInstructions[nins - 3]->mSrc[1].mTemp && mInstructions[nins - 3]->mSrc[0].mTemp < 0)
{
int i = 0;
while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ireg)
i++;
if (i == nins - 3)
{
if (mDominator->mTrueValueRange[ireg].IsConstant())
{
int start = mDominator->mTrueValueRange[ireg].mMinValue;
int end = mInstructions[nins - 2]->mSrc[0].mIntConst;
int step = mInstructions[nins - 3]->mSrc[0].mTemp < 0 ? mInstructions[nins - 3]->mSrc[0].mIntConst : mInstructions[nins - 3]->mSrc[1].mIntConst;
int count = (end - start) / step;
if (count < 5 && (nins - 3) * count < 20)
{
mInstructions.SetSize(nins - 2);
nins -= 2;
for (int i = 1; i < count; i++)
{
for (int j = 0; j < nins; j++)
{
mInstructions.Push(mInstructions[j]->Clone());
}
}
mNumEntries--;
mLoopHead = false;
mTrueJump = mFalseJump;
mFalseJump = nullptr;
InterInstruction* jins = new InterInstruction();
jins->mCode = IC_JUMP;
mInstructions.Push(jins);
}
}
}
}
}
}
}
if (mTrueJump)
mTrueJump->SingleBlockLoopUnrolling();
if (mFalseJump)
mFalseJump->SingleBlockLoopUnrolling();
}
}
void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedParams)
{
if (!mVisited)
@ -8320,6 +8441,15 @@ void InterCodeProcedure::Close(void)
DisassembleDebug("Simplified range limited relational ops");
#endif
#if 1
if (mModule->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL)
{
ResetVisited();
mEntryBlock->SingleBlockLoopUnrolling();
DisassembleDebug("Single Block loop unrolling");
}
#endif
#if 1
ResetVisited();
@ -8737,7 +8867,7 @@ void InterCodeProcedure::Disassemble(const char* name, bool dumpSets)
}
InterCodeModule::InterCodeModule(void)
: mGlobalVars(nullptr), mProcedures(nullptr)
: mGlobalVars(nullptr), mProcedures(nullptr), mCompilerOptions(0)
{
}

View File

@ -148,6 +148,8 @@ public:
bool Same(const IntegerValueRange& range) const;
bool Merge(const IntegerValueRange& range);
bool IsConstant(void) const;
void LimitMin(int64 value);
void LimitMax(int64 value);
@ -273,6 +275,8 @@ public:
bool IsEqual(const InterInstruction* ins) const;
bool IsEqualSource(const InterInstruction* ins) const;
InterInstruction* Clone(void) const;
bool ReferencesTemp(int temp) const;
bool UsesTemp(int temp) const;
@ -432,6 +436,7 @@ public:
void PeepholeOptimization(void);
void SingleBlockLoopOptimisation(const NumberSet& aliasedParams);
void SingleBlockLoopUnrolling(void);
bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*> & body);
void CollectLoopPath(const GrowingArray<InterCodeBasicBlock*>& body, GrowingArray<InterCodeBasicBlock*>& path);
void InnerLoopOptimization(const NumberSet& aliasedParams);
@ -527,4 +532,7 @@ public:
GrowingInterCodeProcedurePtrArray mProcedures;
GrowingVariableArray mGlobalVars;
uint64 mCompilerOptions;
};

View File

@ -272,8 +272,8 @@ void Linker::Link(void)
}
}
mProgramStart = 0x0801;
mProgramEnd = 0x0801;
mProgramStart = 0xffff;
mProgramEnd = 0x0000;
int address = 0;
@ -282,9 +282,14 @@ void Linker::Link(void)
LinkerRegion* lrgn = mRegions[i];
address = lrgn->mStart + lrgn->mNonzero;
if (lrgn->mNonzero && address > mProgramEnd)
if (lrgn->mNonzero)
{
if (lrgn->mStart < mProgramStart)
mProgramStart = lrgn->mStart;
if (address > mProgramEnd)
mProgramEnd = address;
}
}
// Place stack segment
@ -393,6 +398,20 @@ static const char* LinkerSectionTypeNames[] = {
"STACK"
};
bool Linker::WriteBinFile(const char* filename)
{
FILE* file;
fopen_s(&file, filename, "wb");
if (file)
{
int done = fwrite(mMemory + mProgramStart, 1, mProgramEnd - mProgramStart, file);
fclose(file);
return done == mProgramEnd - mProgramStart;
}
else
return false;
}
bool Linker::WritePrgFile(const char* filename)
{
FILE* file;

View File

@ -147,6 +147,7 @@ public:
bool WriteAsmFile(const char* filename);
bool WriteLblFile(const char* filename);
bool WriteCrtFile(const char* filename);
bool WriteBinFile(const char* filename);
GrowingArray<LinkerReference*> mReferences;
GrowingArray<LinkerRegion*> mRegions;

View File

@ -200,6 +200,11 @@ int main(int argc, const char** argv)
compiler->mCompilerOptions |= COPT_TARGET_CRT16;
compiler->AddDefine(Ident::Unique("OSCAR_TARGET_CRT16"), "1");
}
else if (!strcmp(targetFormat, "bin"))
{
compiler->mCompilerOptions |= COPT_TARGET_BIN;
compiler->AddDefine(Ident::Unique("OSCAR_TARGET_BIN"), "1");
}
else
compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid target format option", targetFormat);