Improve strength reduction in loops

This commit is contained in:
drmortalwombat 2024-05-22 20:36:22 +02:00
parent d3734a66e0
commit 3460c06508
3 changed files with 193 additions and 12 deletions

View File

@ -228,6 +228,9 @@ rem @echo off
@call :testn mmultest.c @call :testn mmultest.c
@if %errorlevel% neq 0 goto :error @if %errorlevel% neq 0 goto :error
@call :test tileexpand.cpp
@if %errorlevel% neq 0 goto :error
@exit /b 0 @exit /b 0
:error :error

83
autotest/tileexpand.cpp Normal file
View File

@ -0,0 +1,83 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#define MAP_WIDTH 10
#define MAP_HEIGHT 2
#define TITLE_TILE_WIDTH 4
#define TITLE_TILE_HEIGHT 4
#define PTR_SCREEN ((char *)0xc000)
#define PTR_BUFFER ((char *)0xc400)
#define PTR_COLOR ((char *)0xd800)
#define PTR_FONTCHARSET ((char *)0xd800)
const char TitleMap[1024] = {
#for(i, 1024) i * 17,
};
const char TitleTiles[4096] = {
#for(i, 4096) i * 31,
};
// Custom screen address
extern char* const Screen = PTR_SCREEN;
// Color mem address
extern char* const Color = PTR_COLOR;
void RenderLogo(char screenY)
{
char * sp = Screen;
char * cp = Color;
const char * mp = TitleMap;
for(char ty=0; ty < MAP_HEIGHT; ty++)
{
for(char tx=0; tx< MAP_WIDTH; tx++)
{
char ti = mp[tx];
const char* tp = TitleTiles + (TITLE_TILE_WIDTH * TITLE_TILE_HEIGHT) * ti;
for(char y=0; y<TITLE_TILE_HEIGHT; y++)
{
for(char x=0; x<TITLE_TILE_WIDTH; x++)
{
char c = tp[TITLE_TILE_WIDTH * y + x];
sp[40 * (y + screenY) + x] = c;
cp[40 * (y + screenY) + x] = 1;
}
}
sp += TITLE_TILE_WIDTH;
cp += TITLE_TILE_WIDTH;
}
sp += 120;
cp += 120;
mp += MAP_WIDTH;
}
}
void VerifyLogo(char screenY)
{
for(char dy=0; dy<MAP_HEIGHT * TITLE_TILE_HEIGHT; dy++)
{
for(char dx=0; dx<MAP_WIDTH * TITLE_TILE_WIDTH; dx++)
{
char ty = dy / TITLE_TILE_HEIGHT, iy = dy % TITLE_TILE_HEIGHT;
char tx = dx / TITLE_TILE_WIDTH, ix = dx % TITLE_TILE_WIDTH;
int si = TitleMap[MAP_WIDTH * ty + tx] * TITLE_TILE_WIDTH * TITLE_TILE_HEIGHT + TITLE_TILE_WIDTH * iy + ix;
int di = 40 * (dy + screenY) + dx;
assert(Screen[di] == TitleTiles[si]);
}
}
}
int main(void)
{
RenderLogo(1);
VerifyLogo(1);
return 0;
}

View File

@ -14396,16 +14396,17 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
if (!modified) if (!modified)
{ {
int tz = tail->mInstructions.Size(); int tz = tail->mInstructions.Size();
if (tz > 2) for (int i = 0; i < tz; i++)
{ {
InterInstruction* ai = tail->mInstructions[tz - 3]; InterInstruction* ai = tail->mInstructions[i];
if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) && if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) &&
!tail->IsTempModifiedInRange(tz - 1, tz, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ai->mDst.mTemp)) !tail->IsTempModifiedInRange(i + 1, tz, ai->mDst.mTemp) &&
!tail->IsTempModifiedInRange(0, i - 1, ai->mDst.mTemp))
{ {
int i = 0; int j = 0;
while (i + 1 < body.Size() && !body[i]->IsTempModified(ai->mDst.mTemp)) while (j + 1 < body.Size() && !body[j]->IsTempModified(ai->mDst.mTemp))
i++; j++;
if (i + 1 == body.Size()) if (j + 1 == body.Size())
{ {
indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst; indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst;
} }
@ -14454,6 +14455,36 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
modified = true; modified = true;
continue; continue;
} }
else if (lins->mOperator == IA_ADD && lins->mSrc[0].mTemp >= 0 && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
{
if (i + 1 < mInstructions.Size() && mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == lins->mDst.mTemp)
;
else if (lins->mSrc[1].mTemp < 0 || IsLoopInvariantTemp(lins->mSrc[1].mTemp, body))
{
int s = indexScale[lins->mSrc[0].mTemp];
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
tail->mExitRequiredTemps += lins->mDst.mTemp;
tail->mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR);
ains->mOperator = IA_ADD;
ains->mDst = lins->mDst;
ains->mSrc[1] = lins->mDst;
ains->mSrc[0].mType = lins->mDst.mType;
ains->mSrc[0].mTemp = -1;
ains->mSrc[0].mIntConst = s;
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
indexScale[ains->mDst.mTemp] = s;
modified = true;
continue;
}
}
} }
else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body)) else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
{ {
@ -14461,7 +14492,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
if (nins->mCode == IC_BINARY_OPERATOR) if (nins->mCode == IC_BINARY_OPERATOR)
{ {
if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp >= 0 && nins->mSrc[1].mFinal && nins->mDst.mTemp && IsSingleLoopAssign(i + 1, this, body)) if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp == lins->mDst.mTemp && nins->mSrc[1].mFinal && IsSingleLoopAssign(i + 1, this, body))
{ {
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins); mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins);
@ -14486,6 +14517,40 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
continue; continue;
} }
} }
#if 1
if (nins->mCode == IC_LEA && nins->mSrc[0].mTemp == lins->mDst.mTemp)
{
}
else if (nins->mCode == IC_BINARY_OPERATOR && nins->mOperator == IA_ADD && nins->mSrc[0].mTemp == lins->mDst.mTemp && !nins->mDst.IsNotUByte())
{
}
else
{
int s = indexScale[lins->mSrc[0].mTemp];
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
tail->mExitRequiredTemps += lins->mDst.mTemp;
tail->mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR);
ains->mOperator = IA_ADD;
ains->mDst = lins->mDst;
ains->mSrc[1] = lins->mDst;
ains->mSrc[0].mType = lins->mDst.mType;
ains->mSrc[0].mTemp = -1;
ains->mSrc[0].mIntConst = s;
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
indexScale[ains->mDst.mTemp] = s;
modified = true;
continue;
}
#endif
} }
else if (lins->mCode == IC_LEA) else if (lins->mCode == IC_LEA)
{ {
@ -14629,6 +14694,31 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
i++; i++;
} }
if (modified)
{
for (int j = 0; j < indexScale.Size(); j++)
{
if (indexScale[j] != 0 && !post->mEntryRequiredTemps[j])
{
int k = 0;
int tz = tail->mInstructions.Size();
while (k < tz && tail->mInstructions[k]->mDst.mTemp != j)
k++;
if (k < tz && !tail->IsTempReferencedInRange(0, k - 1, j) && !tail->IsTempReferencedInRange(k + 1, tz, j))
{
int bi = 0;
while (bi + 1 < body.Size() && !body[bi]->IsTempReferenced(j))
bi++;
if (bi + 1 == body.Size())
{
tail->mInstructions.Remove(k);
}
}
}
}
}
} }
} }
} }
@ -17539,8 +17629,13 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
if (mshift < shift) if (mshift < shift)
{ {
mInstructions[i + 0]->mDst.mRange.mMaxValue <<= mshift;
mInstructions[i + 0]->mSrc[0].mIntConst = shift - mshift; mInstructions[i + 0]->mSrc[0].mIntConst = shift - mshift;
mInstructions[i + 1]->mSrc[0].mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
mInstructions[i + 1]->mDst.mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
mInstructions[i + 1]->mSrc[1].mIntConst = 255ULL >> shift << mshift; mInstructions[i + 1]->mSrc[1].mIntConst = 255ULL >> shift << mshift;
mInstructions[i + 2]->mSrc[1].mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
mInstructions[i + 2]->mSrc[0].mIntConst >>= mshift; mInstructions[i + 2]->mSrc[0].mIntConst >>= mshift;
} }
else if (mshift >= shift) else if (mshift >= shift)
@ -20685,7 +20780,7 @@ void InterCodeProcedure::Close(void)
{ {
GrowingTypeArray tstack(IT_NONE); GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "RenderLogo"); CheckFunc = !strcmp(mIdent->mString, "VerifyLogo");
CheckCase = false; CheckCase = false;
mEntryBlock = mBlocks[0]; mEntryBlock = mBlocks[0];