Auto place globals in zero page with -Oz option

This commit is contained in:
drmortalwombat 2023-04-10 18:26:59 +02:00
parent e23ab50512
commit 78e48c75fd
13 changed files with 199 additions and 13 deletions

View File

@ -81,6 +81,7 @@ The compiler is command line driven, and creates an executable .prg file.
* -Os: optimize for size
* -Oi: enable auto inline of small functions (part of O2/O3)
* -Oa: optimize inline assembler (part of O2/O3)
* -Oz: enable auto placement of global variables in zero page (part of O3)
* -g: create source level debug info and add source line numbers to asm listing
* -tf: target format, may be prg, crt or bin
* -tm : target machine

View File

@ -14,11 +14,12 @@
#define regs __regs
void StackStart, StackEnd, BSSStart, BSSEnd, CodeStart, CodeEnd;
void StackStart, StackEnd, BSSStart, BSSEnd, CodeStart, CodeEnd, ZeroStart, ZeroEnd;
#pragma section(code, 0x0000, CodeStart, CodeEnd)
#pragma section(stack, 0x0000, StackStart, StackEnd)
#pragma section(bss, 0x0000, BSSStart, BSSEnd)
#pragma section(zeropage, 0x0000, ZeroStart, ZeroEnd)
char spentry = 0;
@ -236,6 +237,14 @@ l2: dey
bne l2
w2:
ldx #<ZeroStart
cpx #<ZeroEnd
beq w3
l3: sta $00, x
inx
cpx #<ZeroEnd
bne l3
w3:
lda #<StackEnd - 2
sta sp
lda #>StackEnd - 2

View File

@ -251,8 +251,10 @@ bool Compiler::GenerateCode(void)
{
if (mTargetMachine == TMACH_ATARI)
regionZeroPage = mLinker->AddRegion(identZeroPage, 0x00e0, 0x00ff);
else
else if (mCompilerOptions & (COPT_EXTENDED_ZERO_PAGE | COPT_TARGET_NES))
regionZeroPage = mLinker->AddRegion(identZeroPage, 0x0080, 0x00ff);
else
regionZeroPage = mLinker->AddRegion(identZeroPage, 0x00f7, 0x00ff);
}
LinkerRegion* regionStartup = mLinker->FindRegion(identStartup);
@ -576,6 +578,7 @@ bool Compiler::GenerateCode(void)
mGlobalAnalyzer->CheckInterrupt();
mGlobalAnalyzer->AutoInline();
mGlobalAnalyzer->AutoZeroPage(mCompilationUnits->mSectionZeroPage, regionZeroPage->mEnd - regionZeroPage->mStart);
if (mCompilerOptions & COPT_VERBOSE3)
mGlobalAnalyzer->DumpCallGraph();

View File

@ -10,8 +10,9 @@ static const uint64 COPT_OPTIMIZE_AUTO_INLINE = 0x00000010;
static const uint64 COPT_OPTIMIZE_AUTO_INLINE_ALL = 0x00000020;
static const uint64 COPT_OPTIMIZE_AUTO_UNROLL = 0x00000040;
static const uint64 COPT_OPTIMIZE_CONST_EXPRESSIONS = 0x00000080;
static const uint64 COPT_OPTIMIZE_AUTO_ZEROPAGE = 0x00000100;
static const uint64 COPT_OPTIMIZE_CODE_SIZE = 0x00000100;
static const uint64 COPT_OPTIMIZE_CODE_SIZE = 0x00001000;
static const uint64 COPT_EXTENDED_ZERO_PAGE = 0x00010000;
@ -39,7 +40,7 @@ static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INL
static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_UNROLL | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_ASSEMBLER;
static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL | COPT_OPTIMIZE_AUTO_UNROLL | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_ASSEMBLER;
static const uint64 COPT_OPTIMIZE_ALL = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL | COPT_OPTIMIZE_AUTO_UNROLL | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_ASSEMBLER | COPT_OPTIMIZE_AUTO_ZEROPAGE;
enum TargetMachine
{

View File

@ -597,7 +597,7 @@ Declaration::Declaration(const Location& loc, DecType type)
: mLocation(loc), mType(type), mScope(nullptr), mData(nullptr), mIdent(nullptr), mSize(0), mOffset(0), mFlags(0), mComplexity(0), mLocalSize(0),
mBase(nullptr), mParams(nullptr), mValue(nullptr), mNext(nullptr), mVarIndex(-1), mLinkerObject(nullptr), mCallers(nullptr), mCalled(nullptr), mAlignment(1),
mInteger(0), mNumber(0), mMinValue(-0x80000000LL), mMaxValue(0x7fffffffLL), mFastCallBase(0), mFastCallSize(0), mStride(0), mStripe(1),
mCompilerOptions(0)
mCompilerOptions(0), mUseCount(0)
{}
Declaration::~Declaration(void)

View File

@ -195,6 +195,7 @@ public:
LinkerSection * mSection;
const uint8 * mData;
LinkerObject * mLinkerObject;
int mUseCount;
GrowingArray<Declaration*> mCallers, mCalled;

View File

@ -1,7 +1,7 @@
#include "GlobalAnalyzer.h"
GlobalAnalyzer::GlobalAnalyzer(Errors* errors, Linker* linker)
: mErrors(errors), mLinker(linker), mCalledFunctions(nullptr), mCallingFunctions(nullptr), mVariableFunctions(nullptr), mFunctions(nullptr), mCompilerOptions(COPT_DEFAULT)
: mErrors(errors), mLinker(linker), mCalledFunctions(nullptr), mCallingFunctions(nullptr), mVariableFunctions(nullptr), mFunctions(nullptr), mGlobalVariables(nullptr), mCompilerOptions(COPT_DEFAULT)
{
}
@ -50,6 +50,86 @@ void GlobalAnalyzer::DumpCallGraph(void)
printf("LEAF %d -> %s[%d, %08llx]\n", from->mCallers.Size(), from->mIdent->mString, from->mComplexity, from->mFlags );
}
}
for (int i = 0; i < mGlobalVariables.Size(); i++)
{
Declaration* var = mGlobalVariables[i];
printf("VAR %s[%d, %08llx, %d]\n", var->mIdent->mString, var->mSize, var->mFlags, var->mUseCount);
}
}
static int VarUseCountScale(Declaration* type)
{
if (type->mType == DT_TYPE_BOOL || type->mType == DT_TYPE_INTEGER || type->mType == DT_TYPE_FLOAT || type->mType == DT_TYPE_ENUM)
return 0x100 / type->mSize;
else if (type->mType == DT_TYPE_POINTER)
return 0x800;
else if (type->mType == DT_TYPE_ARRAY)
{
if (type->mSize > 0)
return VarUseCountScale(type->mBase) / type->mSize;
else
return 0;
}
else if (type->mSize == DT_TYPE_STRUCT)
{
int size = 0;
Declaration* e = type->mParams;
while (e)
{
int t = VarUseCountScale(e->mBase);
if (t == 0)
return 0;
size += t;
e = e->mNext;
}
return size / (type->mSize * type->mSize);
}
else
return 0;
}
void GlobalAnalyzer::AutoZeroPage(LinkerSection* lszp, int zpsize)
{
if (mCompilerOptions & COPT_OPTIMIZE_AUTO_ZEROPAGE)
{
GrowingArray<Declaration*> vars(nullptr);
for (int i = 0; i < mGlobalVariables.Size(); i++)
{
Declaration* var = mGlobalVariables[i];
if (var->mFlags & DTF_ANALYZED)
{
if (var->mFlags & DTF_ZEROPAGE)
zpsize -= var->mSize;
else if (var->mValue)
;
else
{
var->mUseCount *= VarUseCountScale(var->mBase);
if (var->mUseCount)
{
int j = 0;
while (j < vars.Size() && vars[j]->mUseCount > var->mUseCount)
j++;
vars.Insert(j, var);
}
}
}
}
int i = 0;
while (i < vars.Size() && zpsize > 0)
{
if (vars[i]->mSize <= zpsize && !vars[i]->mLinkerObject)
{
vars[i]->mSection = lszp;
vars[i]->mFlags |= DTF_ZEROPAGE;
zpsize -= vars[i]->mSize;
}
i++;
}
}
}
void GlobalAnalyzer::AutoInline(void)
@ -247,6 +327,8 @@ void GlobalAnalyzer::CheckInterrupt(void)
void GlobalAnalyzer::AnalyzeProcedure(Expression* exp, Declaration* dec)
{
dec->mUseCount++;
if (dec->mFlags & DTF_FUNC_ANALYZING)
{
dec->mFlags |= DTF_FUNC_RECURSIVE;
@ -328,10 +410,14 @@ void GlobalAnalyzer::AnalyzeAssembler(Expression* exp, Declaration* procDec)
void GlobalAnalyzer::AnalyzeGlobalVariable(Declaration* dec)
{
dec->mUseCount++;
if (!(dec->mFlags & DTF_ANALYZED))
{
dec->mFlags |= DTF_ANALYZED;
mGlobalVariables.Push(dec);
if (dec->mValue)
{
Analyze(dec->mValue, dec, false);

View File

@ -14,6 +14,7 @@ public:
void AutoInline(void);
void CheckFastcall(Declaration* procDec);
void CheckInterrupt(void);
void AutoZeroPage(LinkerSection * lszp, int zpsize);
void AnalyzeProcedure(Expression* exp, Declaration* procDec);
void AnalyzeAssembler(Expression* exp, Declaration* procDec);
@ -26,6 +27,7 @@ protected:
Linker* mLinker;
GrowingArray<Declaration*> mCalledFunctions, mCallingFunctions, mVariableFunctions, mFunctions;
GrowingArray<Declaration*> mGlobalVariables;
Declaration* Analyze(Expression* exp, Declaration* procDec, bool lhs);

View File

@ -21713,6 +21713,67 @@ bool NativeCodeBasicBlock::PatchCrossBlockY2XFloodExit(const NativeCodeBasicBloc
return false;
}
void NativeCodeBasicBlock::PropagateZPAbsolute(void)
{
if (!mVisited)
{
mVisited = true;
mDataSet.Reset();
for (int i = 0; i < mIns.Size(); i++)
{
if (i + 1 < mIns.Size() &&
mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ABSOLUTE && mIns[i].mLinkerObject && (mIns[i].mLinkerObject->mFlags & LOBJF_ZEROPAGE) &&
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE)
{
int r = mIns[i + 1].mAddress;
mDataSet.mRegs[r].mMode = NRDM_ABSOLUTE;
mDataSet.mRegs[r].mLinkerObject = mIns[i].mLinkerObject;
mDataSet.mRegs[r].mValue = mIns[i].mAddress;
mDataSet.mRegs[r].mFlags = mIns[i].mFlags;
i++;
}
else if (mIns[i].mType == ASMIT_JSR)
mDataSet.Reset();
else if (mIns[i].mMode == ASMIM_ZERO_PAGE)
{
int r = mIns[i].mAddress;
if (mIns[i].ChangesAddress())
mDataSet.ResetZeroPage(r);
else if (mDataSet.mRegs[r].mMode == NRDM_ABSOLUTE)
{
mIns[i].mMode = ASMIM_ABSOLUTE;
mIns[i].mLinkerObject = mDataSet.mRegs[r].mLinkerObject;
mIns[i].mAddress = mDataSet.mRegs[r].mValue;
}
}
else if (mIns[i].mMode == ASMIM_ABSOLUTE || mIns[i].mMode == ASMIM_ABSOLUTE_X || mIns[i].mMode == ASMIM_ABSOLUTE_Y)
{
if (mIns[i].ChangesAddress())
mDataSet.ResetAbsolute(mIns[i].mLinkerObject, mIns[i].mAddress);
}
else if (mIns[i].mMode == ASMIM_INDIRECT_Y)
{
int r = mIns[i].mAddress;
if (mDataSet.mRegs[r].mMode == NRDM_ABSOLUTE && mDataSet.mRegs[r + 1].mMode == NRDM_ABSOLUTE &&
mDataSet.mRegs[r].mLinkerObject == mDataSet.mRegs[r + 1].mLinkerObject &&
mDataSet.mRegs[r].mValue + 1 == mDataSet.mRegs[r + 1].mValue)
{
mIns[i].mLinkerObject = mDataSet.mRegs[r].mLinkerObject;
mIns[i].mAddress = mDataSet.mRegs[r].mValue;
}
}
}
if (mTrueJump) mTrueJump->PropagateZPAbsolute();
if (mFalseJump) mFalseJump->PropagateZPAbsolute();
}
}
bool NativeCodeBasicBlock::IsDominatedBy(const NativeCodeBasicBlock* block) const
{
if (this == block)
@ -30287,14 +30348,14 @@ bool NativeCodeBasicBlock::BlockSizeCopyReduction(NativeCodeProcedure* proc, int
NativeCodeInstruction lins = mIns[si + 2 * i * j + 0];
NativeCodeInstruction sins = mIns[si + 2 * i * j + 1];
if (lins.mMode == ASMIM_ZERO_PAGE)
if (lins.mMode == ASMIM_ZERO_PAGE || (lins.mLinkerObject && (lins.mLinkerObject->mFlags & LOBJF_ZEROPAGE)))
lins.mMode = ASMIM_ZERO_PAGE_X;
else
{
lins.mMode = ASMIM_ABSOLUTE_X;
sz++;
}
if (sins.mMode == ASMIM_ZERO_PAGE)
if (sins.mMode == ASMIM_ZERO_PAGE || (sins.mLinkerObject && (sins.mLinkerObject->mFlags & LOBJF_ZEROPAGE)))
sins.mMode = ASMIM_ZERO_PAGE_X;
else
{
@ -30410,7 +30471,11 @@ void NativeCodeBasicBlock::BlockSizeReduction(NativeCodeProcedure* proc, int xen
while (i < mIns.Size())
{
if (i + 6 < mIns.Size() &&
if (mIns[i].mType == ASMIT_NOP)
{
i++;
}
else if (i + 6 < mIns.Size() &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 &&
@ -36191,6 +36256,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE)
{
int n = 3;
if (mIns[i + 0].mLinkerObject && (mIns[i + 0].mLinkerObject->mFlags & LOBJF_ZEROPAGE))
n = 100;
if (mIns[i + 0].mFlags & NCIF_VOLATILE)
n = 1;
if (mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))
@ -36229,6 +36296,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType == ASMIT_STX && mIns[i + 1].mMode == ASMIM_ZERO_PAGE)
{
int n = 3;
if (mIns[i + 0].mLinkerObject && (mIns[i + 0].mLinkerObject->mFlags & LOBJF_ZEROPAGE))
n = 100;
if (mIns[i + 0].mFlags & NCIF_VOLATILE)
n = 1;
if (mIns[i + 1].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Z))
@ -36254,8 +36323,12 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 0].mType == ASMIT_STA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ABSOLUTE && !(mIns[i + 1].mFlags & NCIF_VOLATILE))
{
int n = 2;
if (mIns[i + 1].mLinkerObject && (mIns[i + 1].mLinkerObject->mFlags & LOBJF_ZEROPAGE))
n = 100;
proc->ResetPatched();
if (CheckSingleUseGlobalLoad(this, mIns[i + 0].mAddress, i + 2, mIns[i + 1], 2))
if (CheckSingleUseGlobalLoad(this, mIns[i + 0].mAddress, i + 2, mIns[i + 1], n))
{
proc->ResetPatched();
if (PatchSingleUseGlobalLoad(this, mIns[i + 0].mAddress, i + 2, mIns[i + 1]))
@ -39500,6 +39573,13 @@ void NativeCodeProcedure::Optimize(void)
NativeRegisterDataSet data;
mEntryBlock->ValueForwarding(this, data, true, true);
ResetVisited();
mEntryBlock->PropagateZPAbsolute();
BuildDataFlowSets();
ResetVisited();
mEntryBlock->RemoveUnusedResultInstructions();
#if 1
ResetVisited();
mEntryBlock->BlockSizeReduction(this, -1, -1);

View File

@ -592,12 +592,13 @@ public:
bool PatchCrossBlockY2XFlood(const NativeCodeBasicBlock* block, int at);
bool PatchCrossBlockY2XFloodExit(const NativeCodeBasicBlock* block);
void PropagateZPAbsolute(void);
bool IsDominatedBy(const NativeCodeBasicBlock* block) const;
void CheckLive(void);
void CheckBlocks(bool sequence = false);
void CheckVisited(void);
};
class NativeCodeProcedure

View File

@ -189,6 +189,8 @@ int main2(int argc, const char** argv)
compiler->mCompilerOptions |= COPT_OPTIMIZE_ASSEMBLER;
else if (arg[2] == 'i')
compiler->mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE;
else if (arg[2] == 'z')
compiler->mCompilerOptions |= COPT_OPTIMIZE_AUTO_ZEROPAGE;
}
else if (arg[1] == 'e')
{

Binary file not shown.

View File

@ -11,12 +11,12 @@
// shrink size of startup section
#pragma section(startup, 0);
#pragma region(startup, 0x0801, 0x0860, , , { startup } )
#pragma region(startup, 0x0801, 0x0870, , , { startup } )
// section for code copy
#pragma section(rcode, 0)
#pragma region(rcode, 0x0860, 0x0900, , , { rcode } )
#pragma region(rcode, 0x0870, 0x0900, , , { rcode } )
// main section to stay resident, save three bytes at the
// beginning to have space for an entry jump