From c8abb42c3cf0e328de28d0c28e38cdc03879f4af Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 19 Jan 2025 18:10:02 +0100 Subject: [PATCH] Optimize outliner compile time performance --- oscar64.md | 3 + oscar64/NativeCodeGenerator.cpp | 18 ++--- oscar64/NativeCodeOutliner.cpp | 130 +++++++++++++++++++++++--------- oscar64/NativeCodeOutliner.h | 6 +- 4 files changed, 112 insertions(+), 45 deletions(-) diff --git a/oscar64.md b/oscar64.md index 357f092..e747fa9 100644 --- a/oscar64.md +++ b/oscar64.md @@ -128,6 +128,7 @@ The compiler is command line driven, and creates an executable .prg file. * -Oa : optimize inline assembler (part of O2/O3) * -Oz : enable auto placement of global variables in zero page (part of O3) * -Op : optimize constant parameters +* -Oo : optimize size using "outliner" (extract repeated code sequences into functions) * -g : create source level debug info and add source line numbers to asm listing * -gp : create source level debug info and add source line numbers to asm listing and static profile data * -tf : target format, may be prg, crt or bin @@ -483,6 +484,8 @@ Set optimizer options that are active for the functions after it * maxinline : inline any function suitable * constparams : enable constant parameter folding into called functions * noconstparams : disable constant parameter folding into called functions +* outline : enable outliner +* nooutline : disable outliner * 0 : no optimization * 1 : default optimizations * 2 : aggressive optimizations diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 33506ac..108888d 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -5015,9 +5015,12 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet& uint32 NativeCodeInstruction::CodeHash(void) const { - uint32 hash = mType + 0x20 * mMode + 0x100 * mAddress; + uint32 hash = mType + 137 * mMode + 4111 * mAddress; if (mLinkerObject) - hash += mLinkerObject->mID * 0x1000; + hash += mLinkerObject->mID * 135123; + hash ^= hash >> 13; + hash ^= hash << 11; + hash ^= hash >> 23; return hash; } @@ -55514,10 +55517,10 @@ void NativeCodeGenerator::OutlineFunctions(void) tree->LongestMatch(mapper, 0, 0, lsize, ltree); if (lsize > 6) { - SuffixTree* leaf = ltree; - while (leaf->mFirst) - leaf = leaf->mFirst; - NativeCodeBasicBlock* block = mapper.mBlocks[-(1 + leaf->mSeg[leaf->mSize - 1])]; + ExpandingArray segs; + ltree->ReplaceCalls(mapper, segs); + + NativeCodeBasicBlock* block = segs[0].mBlock; NativeCodeProcedure* nproc = new NativeCodeProcedure(this); @@ -55541,9 +55544,6 @@ void NativeCodeGenerator::OutlineFunctions(void) else nblock->mIns.Push(NativeCodeInstruction(nblock->mIns[nblock->mIns.Size() - 1].mIns, ASMIT_RTS)); - ExpandingArray segs; - ltree->ReplaceCalls(mapper, segs); - segs.Sort([](const SuffixSegment& l, const SuffixSegment& r)->bool { return l.mBlock == r.mBlock ? l.mStart > r.mStart : ptrdiff_t(l.mBlock) < ptrdiff_t(r.mBlock); }); diff --git a/oscar64/NativeCodeOutliner.cpp b/oscar64/NativeCodeOutliner.cpp index 0e8de71..0069e52 100644 --- a/oscar64/NativeCodeOutliner.cpp +++ b/oscar64/NativeCodeOutliner.cpp @@ -62,20 +62,32 @@ SuffixTree::SuffixTree(const int* str, int s, SuffixTree* n) mFirst = nullptr; } +SuffixTree::~SuffixTree(void) +{ + delete[] mFirst; +} + void SuffixTree::AddParents(SuffixTree* parent) { mParent = parent; - SuffixTree* n = mFirst; - while (n) + if (mFirst) { - n->AddParents(this); - n = n->mNext; + for (int i = 0; i < HashSize; i++) + { + SuffixTree* n = mFirst[i]; + while (n) + { + n->AddParents(this); + n = n->mNext; + } + } } } void SuffixTree::AddSuffix(const int* str, int s) { - SuffixTree* c = mFirst; + int hi = str[0] & (HashSize - 1); + SuffixTree* c = mFirst ? mFirst[hi] : nullptr; while (c && c->mSeg[0] != str[0]) c = c->mNext; @@ -88,15 +100,39 @@ void SuffixTree::AddSuffix(const int* str, int s) c->AddSuffix(str + k, s - k); else { - SuffixTree* t = c->mFirst; - c->mFirst = new SuffixTree(c->mSeg + k, c->mSize - k, nullptr); - c->mFirst->mFirst = t; - c->mFirst = new SuffixTree(str + k, s - k, c->mFirst); + SuffixTree * n = new SuffixTree(c->mSeg + k, c->mSize - k, nullptr); + if (c->mFirst) + { + n->mFirst = new SuffixTree * [HashSize]; + for (int i = 0; i < HashSize; i++) + { + n->mFirst[i] = c->mFirst[i]; + c->mFirst[i] = nullptr; + } + } + else + { + c->mFirst = new SuffixTree * [HashSize]; + for (int i = 0; i < HashSize; i++) + c->mFirst[i] = nullptr; + } + c->mFirst[c->mSeg[k] & (HashSize - 1)] = n; + int hk = str[k] & (HashSize - 1); + c->mFirst[hk] = new SuffixTree(str + k, s - k, c->mFirst[hk]); c->mSize = k; } } else - mFirst = new SuffixTree(str, s, mFirst); + { + if (!mFirst) + { + mFirst = new SuffixTree * [HashSize]; + for (int i = 0; i < HashSize; i++) + mFirst[i] = nullptr; + } + + mFirst[hi] = new SuffixTree(str, s, mFirst[hi]); + } } void SuffixTree::AddString(const int* str) @@ -120,11 +156,15 @@ void SuffixTree::CollectSuffix(NativeCodeMapper& map, int offset, ExpandingArray offset += mSize; if (mFirst) { - SuffixTree* t = mFirst; - while (t) + for (int i = 0; i < HashSize; i++) { - t->CollectSuffix(map, offset, segs); - t = t->mNext; + SuffixTree* t = mFirst[i]; + + while (t) + { + t->CollectSuffix(map, offset, segs); + t = t->mNext; + } } } else @@ -154,22 +194,28 @@ int SuffixTree::LongestMatch(NativeCodeMapper& map, int size, int isize, int& ms assert(size < 10000); int cnt = 0; - SuffixTree* t = mFirst; - while (t) + for (int i = 0; i < HashSize; i++) { - cnt += t->LongestMatch(map, size, isize, msize, mtree); - t = t->mNext; + SuffixTree* t = mFirst[i]; + while (t) + { + cnt += t->LongestMatch(map, size, isize, msize, mtree); + t = t->mNext; + } } if (size >= 6 && (size - 3) * (cnt - 1) > msize) { // Second run to cross check for overlaps ExpandingArray segs; - SuffixTree* t = mFirst; - while (t) + for (int i = 0; i < HashSize; i++) { - t->CollectSuffix(map, 0, segs); - t = t->mNext; + SuffixTree* t = mFirst[i]; + while (t) + { + t->CollectSuffix(map, 0, segs); + t = t->mNext; + } } segs.Sort([](const SuffixSegment& l, const SuffixSegment& r)->bool { return l.mBlock == r.mBlock ? l.mStart < r.mStart : ptrdiff_t(l.mBlock) < ptrdiff_t(r.mBlock); @@ -214,13 +260,18 @@ void SuffixTree::Print(FILE * file, NativeCodeMapper& map, int depth) } fprintf(file, "\n"); - SuffixTree* n = mFirst; - while (n) + if (mFirst) { - n->Print(file, map, depth + 1); - n = n->mNext; + for (int i = 0; i < HashSize; i++) + { + SuffixTree* n = mFirst[i]; + while (n) + { + n->Print(file, map, depth + 1); + n = n->mNext; + } + } } - } void SuffixTree::ParentPrint(FILE* file, NativeCodeMapper& map) @@ -265,11 +316,17 @@ void SuffixTree::ParentCollect(NativeCodeMapper& map, NativeCodeBasicBlock* bloc void SuffixTree::ReplaceCalls(NativeCodeMapper& map, ExpandingArray& segs) { - SuffixTree* n = mFirst; - while (n) + if (mFirst) { - n->ChildReplaceCalls(map, this, 0, segs); - n = n->mNext; + for (int i = 0; i < HashSize; i++) + { + SuffixTree* n = mFirst[i]; + while (n) + { + n->ChildReplaceCalls(map, this, 0, segs); + n = n->mNext; + } + } } } @@ -283,11 +340,14 @@ void SuffixTree::ChildReplaceCalls(NativeCodeMapper& map, SuffixTree* tree, int if (mFirst) { - SuffixTree* n = mFirst; - while (n) + for (int i = 0; i < HashSize; i++) { - n->ChildReplaceCalls(map, tree, offset, segs); - n = n->mNext; + SuffixTree* n = mFirst[i]; + while (n) + { + n->ChildReplaceCalls(map, tree, offset, segs); + n = n->mNext; + } } } else diff --git a/oscar64/NativeCodeOutliner.h b/oscar64/NativeCodeOutliner.h index 000040f..7457eb9 100644 --- a/oscar64/NativeCodeOutliner.h +++ b/oscar64/NativeCodeOutliner.h @@ -41,9 +41,13 @@ public: const int * mSeg; int mSize; - SuffixTree* mNext, * mParent, * mFirst; + static const int HashSize = 32; + + SuffixTree* mNext, * mParent, ** mFirst; SuffixTree(const int* str, int s, SuffixTree* n); + ~SuffixTree(void); + void AddParents(SuffixTree* parent); void AddSuffix(const int* str, int s);