Add optimization -Ox to simplify pointer arithmetic by non page crossing

This commit is contained in:
drmortalwombat 2025-05-11 12:41:40 +02:00
parent b26cc4ede7
commit 05ef25a61e
12 changed files with 237 additions and 20 deletions

View File

@ -277,6 +277,15 @@ exit /b %errorlevel%
..\bin\oscar64 -e -O2 -n -dHEAPCHECK %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O2 -xz -Oz -n %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O2 -Oo -n %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O2 -Ox -n %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O0 -bc %~1
@if %errorlevel% neq 0 goto :error
@ -337,6 +346,9 @@ exit /b %errorlevel%
..\bin\oscar64 -e -O2 -Oo -n %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O2 -Ox -n %~1
@if %errorlevel% neq 0 goto :error
@exit /b 0
:testb
@ -379,4 +391,7 @@ exit /b %errorlevel%
..\bin\oscar64 -e -O2 -Oo -n %~1
@if %errorlevel% neq 0 goto :error
..\bin\oscar64 -e -O2 -Ox -n %~1
@if %errorlevel% neq 0 goto :error
@exit /b 0

View File

@ -15,6 +15,7 @@ static const uint64 COPT_OPTIMIZE_CONST_PARAMS = 1ULL << 9;
static const uint64 COPT_OPTIMIZE_MERGE_CALLS = 1ULL << 10;
static const uint64 COPT_OPTIMIZE_GLOBAL = 1ULL << 11;
static const uint64 COPT_OPTIMIZE_OUTLINE = 1ULL << 12;
static const uint64 COPT_OPTIMIZE_PAGE_CROSSING = 1ULL << 13;
static const uint64 COPT_OPTIMIZE_CODE_SIZE = 1ULL << 16;
static const uint64 COPT_NATIVE = 1ULL << 17;

View File

@ -3042,6 +3042,22 @@ bool Declaration::IsIndexed(void) const
return mType == DT_TYPE_ARRAY || mType == DT_TYPE_POINTER;
}
bool Declaration::ContainsArray(void) const
{
if (mType == DT_TYPE_ARRAY)
return true;
else if (mType == DT_TYPE_STRUCT)
{
Declaration* p = mParams;
while (p)
{
if (p->mType == DT_ELEMENT && p->mBase->ContainsArray())
return true;
p = p->mNext;
}
}
return false;
}
bool Declaration::IsSimpleType(void) const
{

View File

@ -127,6 +127,7 @@ static const uint64 DTF_FPARAM_UNUSED = (1ULL << 49);
static const uint64 DTF_DEPRECATED = (1ULL << 50);
static const uint64 DTF_FUNC_NO_RETURN = (1ULL << 51);
static const uint64 DTF_PLACED = (1ULL << 52);
static const uint64 DTF_NO_PAGE_CROSS = (1ULL << 53);
@ -333,6 +334,7 @@ public:
bool IsSimpleType(void) const;
bool IsReference(void) const;
bool IsIndexed(void) const;
bool ContainsArray(void) const;
void SetDefined(void);

View File

@ -17506,7 +17506,7 @@ void InterCodeBasicBlock::RemoveUnusedMallocs(void)
}
}
void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue)
void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue, bool loops)
{
if (!mVisited)
{
@ -17536,6 +17536,57 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt
}
}
}
else if (loops && mNumEntries == 2)
{
InterCodeBasicBlock* tail, * post;
if (mEntryBlocks[0] == mLoopPrefix)
tail = mEntryBlocks[1];
else
tail = mEntryBlocks[0];
if (tail->mTrueJump == this)
post = tail->mFalseJump;
else
post = tail->mTrueJump;
if (post && post->mNumEntries == 1)
{
GrowingArray<InterCodeBasicBlock*> body(nullptr);
if (tail->CollectSingleHeadLoopBody(this, tail, body))
{
for (int i = 0; i < ltvalue.Size(); i++)
{
if (ltvalue[i])
{
bool fail = false;
for (int k = 0; k < body.Size() && !fail; k++)
{
InterCodeBasicBlock* b = body[k];
for (int j = 0; j < b->mInstructions.Size() && !fail; j++)
{
InterInstruction* ins = b->mInstructions[j];
if (ins->mDst.mTemp == i)
{
if (ins->mCode == IC_LEA && ins->mSrc[1].mTemp == i)
;
else
fail = true;
}
}
}
if (fail)
{
ltvalue[i] = nullptr;
}
}
}
}
}
}
else
ltvalue.Clear();
}
@ -17567,18 +17618,22 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp > 0 && ltvalue[ins->mSrc[j].mTemp] && ins->mSrc[j].mType == IT_POINTER)
if (ins->mSrc[j].mTemp >= 0 && ltvalue[ins->mSrc[j].mTemp] && ins->mSrc[j].mType == IT_POINTER)
{
ins->mSrc[j].mRestricted = ltvalue[ins->mSrc[j].mTemp]->mDst.mRestricted;
ins->mSrc[j].mMemoryBase = ltvalue[ins->mSrc[j].mTemp]->mDst.mMemoryBase;
ins->mSrc[j].mVarIndex = ltvalue[ins->mSrc[j].mTemp]->mDst.mVarIndex;
ins->mSrc[j].mLinkerObject = ltvalue[ins->mSrc[j].mTemp]->mDst.mLinkerObject;
if (ins->mSrc[j].mMemory == IM_NONE && ins->mSrc[j].mMemoryBase != IM_NONE)
ins->mSrc[j].mMemory = IM_INDIRECT;
assert(ins->mSrc[j].mMemoryBase != IM_LOCAL || ins->mSrc[j].mVarIndex >= 0);
}
}
if (ins->mCode == IC_LEA)
{
ins->mDst.mMemory = ins->mSrc[1].mMemory;
ins->mDst.mRestricted = ins->mSrc[1].mRestricted;
if (ins->mSrc[1].mMemory != IM_INDIRECT)
ins->mSrc[1].mMemoryBase = ins->mSrc[1].mMemory;
@ -17588,6 +17643,7 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt
}
else if (ins->mCode == IC_LOAD_TEMPORARY)
{
ins->mDst.mMemory = ins->mSrc[0].mMemory;
ins->mDst.mRestricted = ins->mSrc[0].mRestricted;
ins->mDst.mMemoryBase = ins->mSrc[0].mMemoryBase;
ins->mDst.mVarIndex = ins->mSrc[0].mVarIndex;
@ -17595,6 +17651,7 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt
}
else if (ins->mCode == IC_CONSTANT)
{
ins->mDst.mMemory = ins->mConst.mMemory;
ins->mDst.mRestricted = ins->mConst.mRestricted;
ins->mDst.mMemoryBase = ins->mConst.mMemory;
ins->mDst.mVarIndex = ins->mConst.mVarIndex;
@ -17611,8 +17668,8 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt
}
if (mTrueJump) mTrueJump->PropagateMemoryAliasingInfo(ltvalue);
if (mFalseJump) mFalseJump->PropagateMemoryAliasingInfo(ltvalue);
if (mTrueJump) mTrueJump->PropagateMemoryAliasingInfo(ltvalue, loops);
if (mFalseJump) mFalseJump->PropagateMemoryAliasingInfo(ltvalue, loops);
}
}
@ -22801,12 +22858,21 @@ void InterCodeProcedure::EliminateDoubleLoopCounter(void)
}
void InterCodeProcedure::PropagateMemoryAliasingInfo(void)
void InterCodeProcedure::PropagateMemoryAliasingInfo(bool loops)
{
GrowingInstructionPtrArray tvalue(nullptr);
if (loops)
{
BuildTraces(0);
BuildLoopPrefix();
ResetEntryBlocks();
ResetVisited();
mEntryBlock->PropagateMemoryAliasingInfo(tvalue);
mEntryBlock->CollectEntryBlocks(nullptr);
}
ResetVisited();
mEntryBlock->PropagateMemoryAliasingInfo(tvalue, loops);
Disassemble("PropagateMemoryAliasingInfo");
}
@ -23401,7 +23467,7 @@ void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory)
bool changed;
do {
PropagateMemoryAliasingInfo();
PropagateMemoryAliasingInfo(false);
GrowingInstructionPtrArray gipa(nullptr);
ResetVisited();
@ -23506,7 +23572,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "mbox::show");
CheckFunc = !strcmp(mIdent->mString, "mbox::configure_animations");
CheckCase = false;
mEntryBlock = mBlocks[0];
@ -23984,6 +24050,8 @@ void InterCodeProcedure::Close(void)
SingleTailLoopOptimization(paramMemory);
BuildDataFlowSets();
PropagateMemoryAliasingInfo(true);
#if 1
ExpandSelect();
@ -24179,6 +24247,8 @@ void InterCodeProcedure::Close(void)
#endif
PropagateMemoryAliasingInfo(true);
ResetVisited();
mEntryBlock->SimplifyIntegerRangeRelops();

View File

@ -638,7 +638,7 @@ public:
void SingleLoopCountZeroCheck(void);
bool PostDecLoopOptimization(void);
void PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue);
void PropagateMemoryAliasingInfo(const GrowingInstructionPtrArray& tvalue, bool loops);
void RemoveUnusedMallocs(void);
bool PullStoreUpToConstAddress(void);
@ -804,7 +804,7 @@ protected:
void CheckUsedDefinedTemps(void);
void WarnUsedUndefinedVariables(void);
void WarnInvalidValueRanges(void);
void PropagateMemoryAliasingInfo(void);
void PropagateMemoryAliasingInfo(bool loops);
void MoveConditionsOutOfLoop(void);
void ShortcutConstBranches(void);
void ShortcutDuplicateBranches(void);

View File

@ -782,6 +782,16 @@ void InterCodeGenerator::InitGlobalVariable(InterCodeModule * mod, Declaration*
var->mLinkerObject->mFlags |= LOBJF_CONST;
if (dec->mFlags & DTF_ZEROPAGE)
var->mLinkerObject->mFlags |= LOBJF_ZEROPAGE;
if (dec->mFlags & DTF_NO_PAGE_CROSS)
var->mLinkerObject->mFlags |= LOBJF_NEVER_CROSS | LOBJF_NO_CROSS;
if (mCompilerOptions & COPT_OPTIMIZE_PAGE_CROSSING)
{
if (dec->mSize <= 256 && dec->mSize > 1)
{
if (dec->mBase->ContainsArray())
var->mLinkerObject->mFlags |= LOBJF_NEVER_CROSS | LOBJF_NO_CROSS;
}
}
var->mIndex = mod->mGlobalVars.Size();
var->mDeclaration = dec;

View File

@ -650,7 +650,7 @@ bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge, boo
int start = (mFreeChunks[i].mStart + lobj->mAlignment - 1) & ~(lobj->mAlignment - 1);
int end = start + lobj->mSize;
if (!(linker->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && (lobj->mFlags & LOBJF_NO_CROSS) && lobj->mSize <= 256 && (start & 0xff00) != ((end - 1) & 0xff00) && !(lobj->mSection->mFlags & LSECF_PACKED))
if (((lobj->mFlags & LOBJF_NEVER_CROSS) || !(linker->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && (lobj->mFlags & LOBJF_NO_CROSS) && !(lobj->mSection->mFlags & LSECF_PACKED)) && lobj->mSize <= 256 && (start & 0xff00) != ((end - 1) & 0xff00))
;
else if (end <= mFreeChunks[i].mEnd)
{
@ -702,7 +702,7 @@ bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge, boo
int start = (mStart + mUsed + lobj->mAlignment - 1) & ~(lobj->mAlignment - 1);
int end = start + lobj->mSize;
if (!(linker->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && !retry && (lobj->mFlags & LOBJF_NO_CROSS) && !(lobj->mFlags & LOBJF_FORCE_ALIGN) && lobj->mSize <= 256 && (start & 0xff00) != ((end - 1) & 0xff00) && !(lobj->mSection->mFlags & LSECF_PACKED))
if (((lobj->mFlags & LOBJF_NEVER_CROSS) || !(linker->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && !retry && (lobj->mFlags & LOBJF_NO_CROSS) && !(lobj->mSection->mFlags & LSECF_PACKED)) && !(lobj->mFlags & LOBJF_FORCE_ALIGN) && lobj->mSize <= 256 && (start & 0xff00) != ((end - 1) & 0xff00))
{
start = (start + 0x00ff) & 0xff00;
end = start + lobj->mSize;

View File

@ -156,6 +156,7 @@ static const uint32 LOBJF_NO_CROSS = 0x00000080;
static const uint32 LOBJF_ZEROPAGE = 0x00000100;
static const uint32 LOBJF_FORCE_ALIGN = 0x00000200;
static const uint32 LOBJF_ZEROPAGESET = 0x00000400;
static const uint32 LOBJF_NEVER_CROSS = 0x00000800;
static const uint32 LOBJF_ARG_REG_A = 0x00001000;
static const uint32 LOBJF_ARG_REG_X = 0x00002000;

View File

@ -3617,6 +3617,62 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_C].mValue = data.mRegs[CPU_REG_A].mValue >= mAddress;
}
else if (mMode == ASMIM_IMMEDIATE_ADDRESS && data.mRegs[CPU_REG_A].mMode == NRDM_IMMEDIATE_ADDRESS)
{
if (mLinkerObject == data.mRegs[CPU_REG_A].mLinkerObject)
{
if (mLinkerObject)
{
if (mLinkerObject->mFlags & LOBJF_NEVER_CROSS)
{
if (mFlags & NCIF_LOWER)
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = (data.mRegs[CPU_REG_A].mValue - mAddress) & 0xff;
}
else
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = 0;
}
}
else
{
if (mFlags & NCIF_LOWER)
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = (data.mRegs[CPU_REG_A].mValue & 0xff) == (mAddress & 0xff) ? 0 : 1;
}
else if (data.mRegs[CPU_REG_A].mValue == mAddress)
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = 0;
}
else
data.mRegs[CPU_REG_Z].Reset();
}
}
else
{
if (mFlags & NCIF_LOWER)
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = (data.mRegs[CPU_REG_A].mValue - mAddress) & 0xff;
}
else
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = ((data.mRegs[CPU_REG_A].mValue - mAddress) >> 8) & 0xff;
}
}
}
else
{
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
data.mRegs[CPU_REG_Z].mValue = 1;
}
data.mRegs[CPU_REG_C].Reset();
}
else if (mMode == ASMIM_IMMEDIATE && mAddress == 0)
{
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
@ -14223,6 +14279,10 @@ void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const
}
else
{
bool crossing = true;
if (ins->mSrc[1].mMemoryBase == IM_GLOBAL && ins->mSrc[1].mLinkerObject && (ins->mSrc[1].mLinkerObject->mFlags & LOBJF_NEVER_CROSS))
crossing = false;
if (ins->mSrc[0].mTemp >= 0 || ins->mSrc[0].mIntConst != 0)
mIns.Push(NativeCodeInstruction(ins, isub ? ASMIT_SEC : ASMIT_CLC, ASMIM_IMPLIED));
@ -14240,6 +14300,8 @@ void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const
mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp] + 1));
if (crossing)
{
if (ins->mSrc[0].mTemp < 0)
{
if (ins->mSrc[0].mIntConst)
@ -14249,6 +14311,7 @@ void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const
mIns.Push(NativeCodeInstruction(ins, iop, ASMIM_IMMEDIATE, 0));
else
mIns.Push(NativeCodeInstruction(ins, iop, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ireg] + 1));
}
mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp] + 1));
}
@ -50945,6 +51008,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterateN(int i, int pass)
}
}
if (i + 6 < mIns.Size() && pass > 3 &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE_ADDRESS && mIns[i + 1].mLinkerObject &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].mMode == ASMIM_IMMEDIATE_ADDRESS && mIns[i + 1].mLinkerObject == mIns[i + 3].mLinkerObject &&
mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 0 &&
mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1)
{
if (mIns[i + 1].mLinkerObject->mFlags & LOBJF_NEVER_CROSS)
{
mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
return true;
}
}
if (i + 7 < mIns.Size() && pass > 3 &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE &&
@ -54650,7 +54729,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mInterProc->mLinkerObject->mNativeProc = this;
CheckFunc = !strcmp(mIdent->mString, "mbox::show");
CheckFunc = !strcmp(mIdent->mString, "main");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];

View File

@ -13728,6 +13728,27 @@ void Parser::ParsePragma(void)
ConsumeToken(TK_CLOSE_PARENTHESIS);
}
else if (!strcmp(mScanner->mTokenIdent->mString, "nocross"))
{
mScanner->NextToken();
ConsumeToken(TK_OPEN_PARENTHESIS);
if (mScanner->mToken == TK_IDENT)
{
Declaration* dec = mGlobals->Lookup(mScanner->mTokenIdent);
if (dec && dec->mType == DT_VARIABLE && (dec->mFlags & DTF_GLOBAL))
dec->mFlags |= DTF_NO_PAGE_CROSS;
else if (dec && dec->mType == DT_CONST_FUNCTION)
dec->mFlags |= DTF_NO_PAGE_CROSS;
else
mErrors->Error(mScanner->mLocation, EERR_OBJECT_NOT_FOUND, "Variable not found");
mScanner->NextToken();
}
else
mErrors->Error(mScanner->mLocation, EERR_PRAGMA_PARAMETER, "Variable name expected");
ConsumeToken(TK_CLOSE_PARENTHESIS);
}
else if (!strcmp(mScanner->mTokenIdent->mString, "align"))
{
mScanner->NextToken();

View File

@ -265,6 +265,8 @@ int main2(int argc, const char** argv)
compiler->mCompilerOptions |= COPT_OPTIMIZE_MERGE_CALLS;
else if (arg[2] == 'o' && !arg[3])
compiler->mCompilerOptions |= COPT_OPTIMIZE_OUTLINE;
else if (arg[2] == 'x' && !arg[3])
compiler->mCompilerOptions |= COPT_OPTIMIZE_PAGE_CROSSING;
else
compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid command line argument", arg);
}