mirror of
https://github.com/beefytech/Beef.git
synced 2025-06-08 11:38:21 +02:00
Optimization phases for LLVM upgrade
This commit is contained in:
parent
72bce05103
commit
d5b0e6d06d
5 changed files with 388 additions and 525 deletions
|
@ -1,5 +1,6 @@
|
||||||
namespace System
|
namespace System
|
||||||
{
|
{
|
||||||
|
[AlwaysInclude]
|
||||||
class Delegate : IHashable
|
class Delegate : IHashable
|
||||||
{
|
{
|
||||||
void* mFuncPtr;
|
void* mFuncPtr;
|
||||||
|
@ -61,6 +62,7 @@ namespace System
|
||||||
|
|
||||||
delegate void Action();
|
delegate void Action();
|
||||||
|
|
||||||
|
[AlwaysInclude]
|
||||||
struct Function : int
|
struct Function : int
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
@ -3289,6 +3289,8 @@ void COFF::ParseCompileUnit_Symbols(DbgCompileUnit* compileUnit, uint8* sectionD
|
||||||
break;
|
break;
|
||||||
case 0x1179:
|
case 0x1179:
|
||||||
break;
|
break;
|
||||||
|
case 0x1180:
|
||||||
|
break;
|
||||||
case 7:
|
case 7:
|
||||||
// Unknown
|
// Unknown
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -69,6 +69,7 @@
|
||||||
#include "llvm/IR/DataLayout.h"
|
#include "llvm/IR/DataLayout.h"
|
||||||
#include "llvm/IR/Verifier.h"
|
#include "llvm/IR/Verifier.h"
|
||||||
#include "llvm/IR/LegacyPassManager.h"
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/ManagedStatic.h"
|
#include "llvm/Support/ManagedStatic.h"
|
||||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||||
|
@ -92,6 +93,7 @@
|
||||||
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
|
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
|
||||||
#include "llvm/Transforms/InstCombine/InstCombine.h"
|
#include "llvm/Transforms/InstCombine/InstCombine.h"
|
||||||
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
|
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
|
||||||
|
#include "llvm/Passes/PassBuilder.h"
|
||||||
|
|
||||||
//#include "llvm/Bitcode/ReaderWriter.h"
|
//#include "llvm/Bitcode/ReaderWriter.h"
|
||||||
|
|
||||||
|
@ -5605,548 +5607,400 @@ llvm::Type* BfIRCodeGen::GetLLVMTypeById(int id)
|
||||||
return GetTypeEntry(id).mType->mLLVMType;
|
return GetTypeEntry(id).mType->mLLVMType;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int GetOptLevel(BfOptLevel optLevel)
|
// LLVM/Clang 18.1.4
|
||||||
|
static void addSanitizers(const llvm::Triple& TargetTriple, BfCodeGenOptions& CodeGenOpts, llvm::PassBuilder& PB)
|
||||||
{
|
{
|
||||||
switch (optLevel)
|
#if 0
|
||||||
|
auto SanitizersCallback = [&](llvm::ModulePassManager& MPM, llvm::OptimizationLevel Level) {
|
||||||
|
if (CodeGenOpts.hasSanitizeCoverage())
|
||||||
{
|
{
|
||||||
case BfOptLevel_O1: return 1;
|
auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
|
||||||
case BfOptLevel_O2: return 2;
|
MPM.addPass(SanitizerCoveragePass(
|
||||||
case BfOptLevel_O3: return 3;
|
SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles,
|
||||||
default: return 0;
|
CodeGenOpts.SanitizeCoverageIgnorelistFiles));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//enum CFLAAType { None, Steensgaard, Andersen, Both };
|
|
||||||
|
|
||||||
static void AddInitialAliasAnalysisPasses(llvm::legacy::PassManagerBase &PM, const BfCodeGenOptions& options)
|
|
||||||
{
|
|
||||||
|
|
||||||
//TODO:
|
|
||||||
// switch (options.mUseCFLAA) {
|
|
||||||
// case BfCFLAAType_Steensgaard:
|
|
||||||
// PM.add(llvm::createCFLSteensAAWrapperPass());
|
|
||||||
// break;
|
|
||||||
// case BfCFLAAType_Andersen:
|
|
||||||
// PM.add(llvm::createCFLAndersAAWrapperPass());
|
|
||||||
// break;
|
|
||||||
// case BfCFLAAType_Both:
|
|
||||||
// PM.add(llvm::createCFLSteensAAWrapperPass());
|
|
||||||
// PM.add(llvm::createCFLAndersAAWrapperPass());
|
|
||||||
// break;
|
|
||||||
// default:
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
|
|
||||||
// BasicAliasAnalysis wins if they disagree. This is intended to help
|
|
||||||
// support "obvious" type-punning idioms.
|
|
||||||
PM.add(llvm::createTypeBasedAAWrapperPass());
|
|
||||||
PM.add(llvm::createScopedNoAliasAAWrapperPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddInstructionCombiningPass(llvm::legacy::PassManagerBase &PM, const BfCodeGenOptions& options)
|
|
||||||
{
|
|
||||||
bool ExpensiveCombines = GetOptLevel(options.mOptLevel) > 2;
|
|
||||||
//TODO: PM.add(llvm::createInstructionCombiningPass(options.mExpensiveCombines));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddFunctionSimplificationPasses(llvm::legacy::PassManagerBase &MPM, const BfCodeGenOptions& options)
|
|
||||||
{
|
|
||||||
//TODO:
|
|
||||||
|
|
||||||
/*
|
|
||||||
// Start of function pass.
|
|
||||||
// Break up aggregate allocas, using SSAUpdater.
|
|
||||||
MPM.add(llvm::createSROAPass());
|
|
||||||
MPM.add(llvm::createEarlyCSEPass(options.mEnableEarlyCSEMemSSA)); // Catch trivial redundancies
|
|
||||||
//if (EnableGVNHoist)
|
|
||||||
if (options.mEnableGVNHoist)
|
|
||||||
MPM.add(llvm::createGVNHoistPass());
|
|
||||||
if (options.mEnableGVNSink)
|
|
||||||
{
|
|
||||||
MPM.add(llvm::createGVNSinkPass());
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Speculative execution if the target has divergent branches; otherwise nop.
|
if (CodeGenOpts.hasSanitizeBinaryMetadata()) {
|
||||||
MPM.add(llvm::createSpeculativeExecutionIfHasBranchDivergencePass());
|
MPM.addPass(SanitizerBinaryMetadataPass(
|
||||||
MPM.add(llvm::createJumpThreadingPass()); // Thread jumps.
|
getSanitizerBinaryMetadataOptions(CodeGenOpts),
|
||||||
MPM.add(llvm::createCorrelatedValuePropagationPass()); // Propagate conditionals
|
CodeGenOpts.SanitizeMetadataIgnorelistFiles));
|
||||||
MPM.add(llvm::createCFGSimplificationPass()); // Merge & remove BBs
|
}
|
||||||
// Combine silly seq's
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 2)
|
|
||||||
MPM.add(llvm::createAggressiveInstCombinerPass());
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
if (options.mSizeLevel == 0 && !options.mDisableLibCallsShrinkWrap)
|
|
||||||
MPM.add(llvm::createLibCallsShrinkWrapPass());
|
|
||||||
//AddExtensionsToPM(llvm::EP_Peephole, MPM);
|
|
||||||
|
|
||||||
// Optimize memory intrinsic calls based on the profiled size information.
|
auto MSanPass = [&](SanitizerMask Mask, bool CompileKernel) {
|
||||||
if (options.mSizeLevel == 0)
|
if (LangOpts.Sanitize.has(Mask)) {
|
||||||
MPM.add(llvm::createPGOMemOPSizeOptLegacyPass());
|
int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
|
||||||
|
bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
|
||||||
|
|
||||||
MPM.add(llvm::createTailCallEliminationPass()); // Eliminate tail calls
|
MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel,
|
||||||
MPM.add(llvm::createCFGSimplificationPass()); // Merge & remove BBs
|
CodeGenOpts.SanitizeMemoryParamRetval);
|
||||||
MPM.add(llvm::createReassociatePass()); // Reassociate expressions
|
MPM.addPass(MemorySanitizerPass(options));
|
||||||
|
if (Level != OptimizationLevel::O0) {
|
||||||
// Begin the loop pass pipeline.
|
// MemorySanitizer inserts complex instrumentation that mostly follows
|
||||||
if (options.mEnableSimpleLoopUnswitch) {
|
// the logic of the original code, but operates on "shadow" values. It
|
||||||
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
|
// can benefit from re-running some general purpose optimization
|
||||||
// them first so when we re-process a loop they run before other loop
|
|
||||||
// passes.
|
// passes.
|
||||||
MPM.add(llvm::createLoopInstSimplifyPass());
|
MPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
|
||||||
MPM.add(llvm::createLoopSimplifyCFGPass());
|
FunctionPassManager FPM;
|
||||||
|
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
FPM.addPass(JumpThreadingPass());
|
||||||
|
FPM.addPass(GVNPass());
|
||||||
|
FPM.addPass(InstCombinePass());
|
||||||
|
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
|
||||||
}
|
}
|
||||||
// Rotate Loop - disable header duplication at -Oz
|
|
||||||
MPM.add(llvm::createLoopRotatePass(options.mSizeLevel == 2 ? 0 : -1));
|
|
||||||
MPM.add(llvm::createLICMPass()); // Hoist loop invariants
|
|
||||||
if (options.mEnableSimpleLoopUnswitch)
|
|
||||||
MPM.add(llvm::createSimpleLoopUnswitchLegacyPass());
|
|
||||||
else
|
|
||||||
MPM.add(llvm::createLoopUnswitchPass(options.mSizeLevel || GetOptLevel(options.mOptLevel) < 3, options.mDivergentTarget));
|
|
||||||
// FIXME: We break the loop pass pipeline here in order to do full
|
|
||||||
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
|
|
||||||
// need for this.
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass());
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
// We resume loop passes creating a second loop pipeline here.
|
|
||||||
MPM.add(llvm::createIndVarSimplifyPass()); // Canonicalize indvars
|
|
||||||
MPM.add(llvm::createLoopIdiomPass()); // Recognize idioms like memset.
|
|
||||||
//addExtensionsToPM(EP_LateLoopOptimizations, MPM);
|
|
||||||
MPM.add(llvm::createLoopDeletionPass()); // Delete dead loops
|
|
||||||
|
|
||||||
if (options.mEnableLoopInterchange)
|
|
||||||
MPM.add(llvm::createLoopInterchangePass()); // Interchange loops
|
|
||||||
|
|
||||||
MPM.add(llvm::createSimpleLoopUnrollPass(GetOptLevel(options.mOptLevel),
|
|
||||||
options.mDisableUnrollLoops)); // Unroll small loops
|
|
||||||
//addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
|
|
||||||
// This ends the loop pass pipelines.
|
|
||||||
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 1) {
|
|
||||||
MPM.add(llvm::createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
|
|
||||||
MPM.add(options.mNewGVN ? llvm::createNewGVNPass()
|
|
||||||
: llvm::createGVNPass(options.mDisableGVNLoadPRE)); // Remove redundancies
|
|
||||||
}
|
|
||||||
MPM.add(llvm::createMemCpyOptPass()); // Remove memcpy / form memset
|
|
||||||
MPM.add(llvm::createSCCPPass()); // Constant prop with SCCP
|
|
||||||
|
|
||||||
// Delete dead bit computations (instcombine runs after to fold away the dead
|
|
||||||
// computations, and then ADCE will run later to exploit any new DCE
|
|
||||||
// opportunities that creates).
|
|
||||||
MPM.add(llvm::createBitTrackingDCEPass()); // Delete dead bit computations
|
|
||||||
|
|
||||||
// Run instcombine after redundancy elimination to exploit opportunities
|
|
||||||
// opened up by them.
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
//addExtensionsToPM(EP_Peephole, MPM);
|
|
||||||
MPM.add(llvm::createJumpThreadingPass()); // Thread jumps
|
|
||||||
MPM.add(llvm::createCorrelatedValuePropagationPass());
|
|
||||||
MPM.add(llvm::createDeadStoreEliminationPass()); // Delete dead stores
|
|
||||||
MPM.add(llvm::createLICMPass());
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
|
|
||||||
|
|
||||||
if (options.mRerollLoops)
|
|
||||||
MPM.add(llvm::createLoopRerollPass());
|
|
||||||
if (!options.mRunSLPAfterLoopVectorization && options.mSLPVectorize)
|
|
||||||
MPM.add(llvm::createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
|
||||||
|
|
||||||
MPM.add(llvm::createAggressiveDCEPass()); // Delete dead instructions
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass()); // Merge & remove BBs
|
|
||||||
// Clean up after everything.
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
//addExtensionsToPM(EP_Peephole, MPM);
|
|
||||||
|
|
||||||
// if (options.mEnableCHR && options.mOptLevel >= 3 &&
|
|
||||||
// (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
|
|
||||||
// MPM.add(createControlHeightReductionLegacyPass());
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
static void PopulateModulePassManager(llvm::legacy::PassManagerBase &MPM, const BfCodeGenOptions& options)
|
|
||||||
{
|
|
||||||
// if (!PGOSampleUse.empty()) {
|
|
||||||
// MPM.add(createPruneEHPass());
|
|
||||||
// MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
|
|
||||||
// }
|
|
||||||
|
|
||||||
//TODO:
|
|
||||||
/*
|
|
||||||
llvm::Pass* Inliner;
|
|
||||||
bool prepareForLTO = false;
|
|
||||||
bool prepareForThinLTO = options.mLTOType == BfLTOType_Thin;
|
|
||||||
bool performThinLTO = false;
|
|
||||||
bool enableNonLTOGlobalsModRef = false;
|
|
||||||
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 0)
|
|
||||||
Inliner = llvm::createFunctionInliningPass(GetOptLevel(options.mOptLevel), options.mSizeLevel, false);
|
|
||||||
else
|
|
||||||
Inliner = llvm::createAlwaysInlinerLegacyPass();
|
|
||||||
|
|
||||||
// Allow forcing function attributes as a debugging and tuning aid.
|
|
||||||
MPM.add(llvm::createForceFunctionAttrsLegacyPass());
|
|
||||||
|
|
||||||
// If all optimizations are disabled, just run the always-inline pass and,
|
|
||||||
// if enabled, the function merging pass.
|
|
||||||
if (GetOptLevel(options.mOptLevel) == 0) {
|
|
||||||
//addPGOInstrPasses(MPM);
|
|
||||||
if (Inliner) {
|
|
||||||
MPM.add(Inliner);
|
|
||||||
Inliner = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
|
|
||||||
// creates a CGSCC pass manager, but we don't want to add extensions into
|
|
||||||
// that pass manager. To prevent this we insert a no-op module pass to reset
|
|
||||||
// the pass manager to get the same behavior as EP_OptimizerLast in non-O0
|
|
||||||
// builds. The function merging pass is
|
|
||||||
if (options.mMergeFunctions)
|
|
||||||
MPM.add(llvm::createMergeFunctionsPass());
|
|
||||||
// else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
|
|
||||||
// MPM.add(createBarrierNoopPass());
|
|
||||||
|
|
||||||
if (performThinLTO)
|
|
||||||
{
|
|
||||||
// Drop available_externally and unreferenced globals. This is necessary
|
|
||||||
// with ThinLTO in order to avoid leaving undefined references to dead
|
|
||||||
// globals in the object file.
|
|
||||||
MPM.add(llvm::createEliminateAvailableExternallyPass());
|
|
||||||
MPM.add(llvm::createGlobalDCEPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
|
|
||||||
|
|
||||||
if (prepareForLTO || prepareForThinLTO) {
|
|
||||||
MPM.add(llvm::createCanonicalizeAliasesPass());
|
|
||||||
// Rename anon globals to be able to export them in the summary.
|
|
||||||
// This has to be done after we add the extensions to the pass manager
|
|
||||||
// as there could be passes (e.g. Adddress sanitizer) which introduce
|
|
||||||
// new unnamed globals.
|
|
||||||
MPM.add(llvm::createNameAnonGlobalPass());
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add LibraryInfo if we have some.
|
|
||||||
// if (LibraryInfo)
|
|
||||||
// MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
|
|
||||||
|
|
||||||
AddInitialAliasAnalysisPasses(MPM, options);
|
|
||||||
|
|
||||||
// For ThinLTO there are two passes of indirect call promotion. The
|
|
||||||
// first is during the compile phase when PerformThinLTO=false and
|
|
||||||
// intra-module indirect call targets are promoted. The second is during
|
|
||||||
// the ThinLTO backend when PerformThinLTO=true, when we promote imported
|
|
||||||
// inter-module indirect calls. For that we perform indirect call promotion
|
|
||||||
// earlier in the pass pipeline, here before globalopt. Otherwise imported
|
|
||||||
// available_externally functions look unreferenced and are removed.
|
|
||||||
// if (performThinLTO)
|
|
||||||
// MPM.add(llvm::createPGOIndirectCallPromotionLegacyPass(true,
|
|
||||||
// !PGOSampleUse.empty()));
|
|
||||||
|
|
||||||
// For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
|
|
||||||
// as it will change the CFG too much to make the 2nd profile annotation
|
|
||||||
// in backend more difficult.
|
|
||||||
// bool PrepareForThinLTOUsingPGOSampleProfile =
|
|
||||||
// PrepareForThinLTO && !PGOSampleUse.empty();
|
|
||||||
|
|
||||||
bool disableUnrollLoops = false;
|
|
||||||
bool prepareForThinLTOUsingPGOSampleProfile = false;
|
|
||||||
if (prepareForThinLTOUsingPGOSampleProfile)
|
|
||||||
disableUnrollLoops = true;
|
|
||||||
|
|
||||||
// Infer attributes about declarations if possible.
|
|
||||||
MPM.add(llvm::createInferFunctionAttrsLegacyPass());
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
|
|
||||||
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 2)
|
|
||||||
MPM.add(llvm::createCallSiteSplittingPass());
|
|
||||||
|
|
||||||
MPM.add(llvm::createIPSCCPPass()); // IP SCCP
|
|
||||||
MPM.add(llvm::createCalledValuePropagationPass());
|
|
||||||
MPM.add(llvm::createGlobalOptimizerPass()); // Optimize out global vars
|
|
||||||
// Promote any localized global vars.
|
|
||||||
MPM.add(llvm::createPromoteMemoryToRegisterPass());
|
|
||||||
|
|
||||||
MPM.add(llvm::createDeadArgEliminationPass()); // Dead argument elimination
|
|
||||||
|
|
||||||
AddInstructionCombiningPass(MPM, options); // Clean up after IPCP & DAE
|
|
||||||
//addExtensionsToPM(EP_Peephole, MPM);
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass()); // Clean up after IPCP & DAE
|
|
||||||
|
|
||||||
// For SamplePGO in ThinLTO compile phase, we do not want to do indirect
|
|
||||||
// call promotion as it will change the CFG too much to make the 2nd
|
|
||||||
// profile annotation in backend more difficult.
|
|
||||||
// PGO instrumentation is added during the compile phase for ThinLTO, do
|
|
||||||
// not run it a second time
|
|
||||||
// if (!performThinLTO && !prepareForThinLTOUsingPGOSampleProfile)
|
|
||||||
// llvm::addPGOInstrPasses(MPM);
|
|
||||||
|
|
||||||
// We add a module alias analysis pass here. In part due to bugs in the
|
|
||||||
// analysis infrastructure this "works" in that the analysis stays alive
|
|
||||||
// for the entire SCC pass run below.
|
|
||||||
MPM.add(llvm::createGlobalsAAWrapperPass());
|
|
||||||
|
|
||||||
// Start of CallGraph SCC passes.
|
|
||||||
MPM.add(llvm::createPruneEHPass()); // Remove dead EH info
|
|
||||||
bool RunInliner = false;
|
|
||||||
if (Inliner) {
|
|
||||||
MPM.add(Inliner);
|
|
||||||
Inliner = nullptr;
|
|
||||||
RunInliner = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
MPM.add(llvm::createPostOrderFunctionAttrsLegacyPass());
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 2)
|
|
||||||
MPM.add(llvm::createArgumentPromotionPass()); // Scalarize uninlined fn args
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
|
|
||||||
AddFunctionSimplificationPasses(MPM, options);
|
|
||||||
|
|
||||||
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
|
|
||||||
// pass manager that we are specifically trying to avoid. To prevent this
|
|
||||||
// we must insert a no-op module pass to reset the pass manager.
|
|
||||||
MPM.add(llvm::createBarrierNoopPass());
|
|
||||||
|
|
||||||
if (options.mRunPartialInlining)
|
|
||||||
MPM.add(llvm::createPartialInliningPass());
|
|
||||||
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 1 && !prepareForLTO && !prepareForThinLTO)
|
|
||||||
// Remove avail extern fns and globals definitions if we aren't
|
|
||||||
// compiling an object file for later LTO. For LTO we want to preserve
|
|
||||||
// these so they are eligible for inlining at link-time. Note if they
|
|
||||||
// are unreferenced they will be removed by GlobalDCE later, so
|
|
||||||
// this only impacts referenced available externally globals.
|
|
||||||
// Eventually they will be suppressed during codegen, but eliminating
|
|
||||||
// here enables more opportunity for GlobalDCE as it may make
|
|
||||||
// globals referenced by available external functions dead
|
|
||||||
// and saves running remaining passes on the eliminated functions.
|
|
||||||
MPM.add(llvm::createEliminateAvailableExternallyPass());
|
|
||||||
|
|
||||||
MPM.add(llvm::createReversePostOrderFunctionAttrsPass());
|
|
||||||
|
|
||||||
// The inliner performs some kind of dead code elimination as it goes,
|
|
||||||
// but there are cases that are not really caught by it. We might
|
|
||||||
// at some point consider teaching the inliner about them, but it
|
|
||||||
// is OK for now to run GlobalOpt + GlobalDCE in tandem as their
|
|
||||||
// benefits generally outweight the cost, making the whole pipeline
|
|
||||||
// faster.
|
|
||||||
if (RunInliner) {
|
|
||||||
MPM.add(llvm::createGlobalOptimizerPass());
|
|
||||||
MPM.add(llvm::createGlobalDCEPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we are planning to perform ThinLTO later, let's not bloat the code with
|
|
||||||
// unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
|
|
||||||
// during ThinLTO and perform the rest of the optimizations afterward.
|
|
||||||
if (prepareForThinLTO) {
|
|
||||||
// Ensure we perform any last passes, but do so before renaming anonymous
|
|
||||||
// globals in case the passes add any.
|
|
||||||
//addExtensionsToPM(EP_OptimizerLast, MPM);
|
|
||||||
MPM.add(llvm::createCanonicalizeAliasesPass());
|
|
||||||
// Rename anon globals to be able to export them in the summary.
|
|
||||||
MPM.add(llvm::createNameAnonGlobalPass());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (performThinLTO)
|
|
||||||
// Optimize globals now when performing ThinLTO, this enables more
|
|
||||||
// optimizations later.
|
|
||||||
MPM.add(llvm::createGlobalOptimizerPass());
|
|
||||||
|
|
||||||
// Scheduling LoopVersioningLICM when inlining is over, because after that
|
|
||||||
// we may see more accurate aliasing. Reason to run this late is that too
|
|
||||||
// early versioning may prevent further inlining due to increase of code
|
|
||||||
// size. By placing it just after inlining other optimizations which runs
|
|
||||||
// later might get benefit of no-alias assumption in clone loop.
|
|
||||||
if (options.mUseLoopVersioningLICM) {
|
|
||||||
MPM.add(llvm::createLoopVersioningLICMPass()); // Do LoopVersioningLICM
|
|
||||||
MPM.add(llvm::createLICMPass()); // Hoist loop invariants
|
|
||||||
}
|
|
||||||
|
|
||||||
// We add a fresh GlobalsModRef run at this point. This is particularly
|
|
||||||
// useful as the above will have inlined, DCE'ed, and function-attr
|
|
||||||
// propagated everything. We should at this point have a reasonably minimal
|
|
||||||
// and richly annotated call graph. By computing aliasing and mod/ref
|
|
||||||
// information for all local globals here, the late loop passes and notably
|
|
||||||
// the vectorizer will be able to use them to help recognize vectorizable
|
|
||||||
// memory operations.
|
|
||||||
//
|
|
||||||
// Note that this relies on a bug in the pass manager which preserves
|
|
||||||
// a module analysis into a function pass pipeline (and throughout it) so
|
|
||||||
// long as the first function pass doesn't invalidate the module analysis.
|
|
||||||
// Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
|
|
||||||
// this to work. Fortunately, it is trivial to preserve AliasAnalysis
|
|
||||||
// (doing nothing preserves it as it is required to be conservatively
|
|
||||||
// correct in the face of IR changes).
|
|
||||||
MPM.add(llvm::createGlobalsAAWrapperPass());
|
|
||||||
|
|
||||||
MPM.add(llvm::createFloat2IntPass());
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_VectorizerStart, MPM);
|
|
||||||
|
|
||||||
// Re-rotate loops in all our loop nests. These may have fallout out of
|
|
||||||
// rotated form due to GVN or other transformations, and the vectorizer relies
|
|
||||||
// on the rotated form. Disable header duplication at -Oz.
|
|
||||||
MPM.add(llvm::createLoopRotatePass(options.mSizeLevel == 2 ? 0 : -1));
|
|
||||||
|
|
||||||
// Distribute loops to allow partial vectorization. I.e. isolate dependences
|
|
||||||
// into separate loop that would otherwise inhibit vectorization. This is
|
|
||||||
// currently only performed for loops marked with the metadata
|
|
||||||
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
|
|
||||||
MPM.add(llvm::createLoopDistributePass());
|
|
||||||
|
|
||||||
MPM.add(llvm::createLoopVectorizePass(options.mDisableUnrollLoops, !options.mLoopVectorize));
|
|
||||||
|
|
||||||
// Eliminate loads by forwarding stores from the previous iteration to loads
|
|
||||||
// of the current iteration.
|
|
||||||
MPM.add(llvm::createLoopLoadEliminationPass());
|
|
||||||
|
|
||||||
// FIXME: Because of #pragma vectorize enable, the passes below are always
|
|
||||||
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
|
|
||||||
// on -O1 and no #pragma is found). Would be good to have these two passes
|
|
||||||
// as function calls, so that we can only pass them when the vectorizer
|
|
||||||
// changed the code.
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 1 && options.mExtraVectorizerPasses) {
|
|
||||||
// At higher optimization levels, try to clean up any runtime overlap and
|
|
||||||
// alignment checks inserted by the vectorizer. We want to track correllated
|
|
||||||
// runtime checks for two inner loops in the same outer loop, fold any
|
|
||||||
// common computations, hoist loop-invariant aspects out of any outer loop,
|
|
||||||
// and unswitch the runtime checks if possible. Once hoisted, we may have
|
|
||||||
// dead (or speculatable) control flows or more combining opportunities.
|
|
||||||
MPM.add(llvm::createEarlyCSEPass());
|
|
||||||
MPM.add(llvm::createCorrelatedValuePropagationPass());
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
MPM.add(llvm::createLICMPass());
|
|
||||||
MPM.add(llvm::createLoopUnswitchPass(options.mSizeLevel || GetOptLevel(options.mOptLevel) < 3, options.mDivergentTarget));
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass());
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
|
||||||
// GVN, loop transforms, and others have already run, so it's now better to
|
|
||||||
// convert to more optimized IR using more aggressive simplify CFG options.
|
|
||||||
// The extra sinking transform can create larger basic blocks, so do this
|
|
||||||
// before SLP vectorization.
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass({ 1, true, true, false, false, true }));
|
|
||||||
|
|
||||||
if (options.mRunSLPAfterLoopVectorization && options.mSLPVectorize) {
|
|
||||||
MPM.add(llvm::createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 1 && options.mExtraVectorizerPasses) {
|
|
||||||
MPM.add(llvm::createEarlyCSEPass());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_Peephole, MPM);
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
|
|
||||||
if (options.mEnableUnrollAndJam && !disableUnrollLoops) {
|
|
||||||
// Unroll and Jam. We do this before unroll but need to be in a separate
|
|
||||||
// loop pass manager in order for the outer loop to be processed by
|
|
||||||
// unroll and jam before the inner loop is unrolled.
|
|
||||||
MPM.add(llvm::createLoopUnrollAndJamPass(GetOptLevel(options.mOptLevel)));
|
|
||||||
}
|
|
||||||
|
|
||||||
MPM.add(llvm::createLoopUnrollPass(GetOptLevel(options.mOptLevel),
|
|
||||||
disableUnrollLoops)); // Unroll small loops
|
|
||||||
|
|
||||||
if (!disableUnrollLoops) {
|
|
||||||
// LoopUnroll may generate some redundency to cleanup.
|
|
||||||
AddInstructionCombiningPass(MPM, options);
|
|
||||||
|
|
||||||
// Runtime unrolling will introduce runtime check in loop prologue. If the
|
|
||||||
// unrolled loop is a inner loop, then the prologue will be inside the
|
|
||||||
// outer loop. LICM pass can help to promote the runtime check out if the
|
|
||||||
// checked value is loop invariant.
|
|
||||||
MPM.add(llvm::createLICMPass());
|
|
||||||
}
|
|
||||||
|
|
||||||
MPM.add(llvm::createWarnMissedTransformationsPass());
|
|
||||||
|
|
||||||
// After vectorization and unrolling, assume intrinsics may tell us more
|
|
||||||
// about pointer alignments.
|
|
||||||
MPM.add(llvm::createAlignmentFromAssumptionsPass());
|
|
||||||
|
|
||||||
// FIXME: We shouldn't bother with this anymore.
|
|
||||||
MPM.add(llvm::createStripDeadPrototypesPass()); // Get rid of dead prototypes
|
|
||||||
|
|
||||||
// GlobalOpt already deletes dead functions and globals, at -O2 try a
|
|
||||||
// late pass of GlobalDCE. It is capable of deleting dead cycles.
|
|
||||||
if (GetOptLevel(options.mOptLevel) > 1) {
|
|
||||||
MPM.add(llvm::createGlobalDCEPass()); // Remove dead fns and globals.
|
|
||||||
MPM.add(llvm::createConstantMergePass()); // Merge dup global constants
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.mMergeFunctions)
|
|
||||||
MPM.add(llvm::createMergeFunctionsPass());
|
|
||||||
|
|
||||||
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
|
|
||||||
// canonicalization pass that enables other optimizations. As a result,
|
|
||||||
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
|
|
||||||
// result too early.
|
|
||||||
MPM.add(llvm::createLoopSinkPass());
|
|
||||||
// Get rid of LCSSA nodes.
|
|
||||||
MPM.add(llvm::createInstSimplifyLegacyPass());
|
|
||||||
|
|
||||||
// This hoists/decomposes div/rem ops. It should run after other sink/hoist
|
|
||||||
// passes to avoid re-sinking, but before SimplifyCFG because it can allow
|
|
||||||
// flattening of blocks.
|
|
||||||
MPM.add(llvm::createDivRemPairsPass());
|
|
||||||
|
|
||||||
if (options.mEnableHotColdSplit)
|
|
||||||
MPM.add(llvm::createHotColdSplittingPass());
|
|
||||||
|
|
||||||
// LoopSink (and other loop passes since the last simplifyCFG) might have
|
|
||||||
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
|
|
||||||
MPM.add(llvm::createCFGSimplificationPass());
|
|
||||||
|
|
||||||
//addExtensionsToPM(EP_OptimizerLast, MPM);
|
|
||||||
|
|
||||||
if (prepareForLTO) {
|
|
||||||
MPM.add(llvm::createCanonicalizeAliasesPass());
|
|
||||||
// Rename anon globals to be able to handle them in the summary
|
|
||||||
MPM.add(llvm::createNameAnonGlobalPass());
|
|
||||||
}*/
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
struct BfPass : public llvm::MachineFunctionPass
|
|
||||||
{
|
|
||||||
static char ID;
|
|
||||||
BfPass() : llvm::MachineFunctionPass(ID) {}
|
|
||||||
|
|
||||||
bool runOnMachineFunction(llvm::MachineFunction &F) override
|
|
||||||
{
|
|
||||||
//errs() << "Hello: ";
|
|
||||||
//errs().write_escaped(F.getName()) << '\n';
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
MSanPass(SanitizerKind::Memory, false);
|
||||||
|
MSanPass(SanitizerKind::KernelMemory, true);
|
||||||
|
|
||||||
char BfPass::ID = 0;
|
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
|
||||||
static llvm::RegisterPass<BfPass> sBfPass("BfPass", "Beef Pass", false, false);
|
MPM.addPass(ModuleThreadSanitizerPass());
|
||||||
|
MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
|
||||||
llvm::Expected<llvm::BitcodeModule> FindThinLTOModule(llvm::MemoryBufferRef MBRef)
|
|
||||||
{
|
|
||||||
llvm::Expected<std::vector<llvm::BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
|
|
||||||
if (!BMsOrErr)
|
|
||||||
return BMsOrErr.takeError();
|
|
||||||
|
|
||||||
// The bitcode file may contain multiple modules, we want the one that is
|
|
||||||
// marked as being the ThinLTO module.
|
|
||||||
for (llvm::BitcodeModule &BM : *BMsOrErr) {
|
|
||||||
llvm::Expected<llvm::BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
|
|
||||||
if (LTOInfo && LTOInfo->IsThinLTO)
|
|
||||||
return BM;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return llvm::make_error<llvm::StringError>("Could not find module summary",
|
auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
|
||||||
llvm::inconvertibleErrorCode());
|
if (LangOpts.Sanitize.has(Mask)) {
|
||||||
|
bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
|
||||||
|
bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
|
||||||
|
llvm::AsanDtorKind DestructorKind =
|
||||||
|
CodeGenOpts.getSanitizeAddressDtor();
|
||||||
|
AddressSanitizerOptions Opts;
|
||||||
|
Opts.CompileKernel = CompileKernel;
|
||||||
|
Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
|
||||||
|
Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
|
||||||
|
Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
|
||||||
|
MPM.addPass(AddressSanitizerPass(Opts, UseGlobalGC, UseOdrIndicator,
|
||||||
|
DestructorKind));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ASanPass(SanitizerKind::Address, false);
|
||||||
|
ASanPass(SanitizerKind::KernelAddress, true);
|
||||||
|
|
||||||
|
auto HWASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
|
||||||
|
if (LangOpts.Sanitize.has(Mask)) {
|
||||||
|
bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
|
||||||
|
MPM.addPass(HWAddressSanitizerPass(
|
||||||
|
{ CompileKernel, Recover,
|
||||||
|
/*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0 }));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
HWASanPass(SanitizerKind::HWAddress, false);
|
||||||
|
HWASanPass(SanitizerKind::KernelHWAddress, true);
|
||||||
|
|
||||||
|
if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) {
|
||||||
|
MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (ClSanitizeOnOptimizerEarlyEP) {
|
||||||
|
PB.registerOptimizerEarlyEPCallback(
|
||||||
|
[SanitizersCallback](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
ModulePassManager NewMPM;
|
||||||
|
SanitizersCallback(NewMPM, Level);
|
||||||
|
if (!NewMPM.isEmpty()) {
|
||||||
|
// Sanitizers can abandon<GlobalsAA>.
|
||||||
|
NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
|
||||||
|
MPM.addPass(std::move(NewMPM));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// LastEP does not need GlobalsAA.
|
||||||
|
PB.registerOptimizerLastEPCallback(SanitizersCallback);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// LLVM/Clang 18.1.4
|
||||||
|
static void addKCFIPass(const llvm::Triple& TargetTriple, const BfCodeGenOptions& codeGenOpts, llvm::PassBuilder& PB)
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
// If the back-end supports KCFI operand bundle lowering, skip KCFIPass.
|
||||||
|
if (TargetTriple.getArch() == llvm::Triple::x86_64 ||
|
||||||
|
TargetTriple.isAArch64(64) || TargetTriple.isRISCV())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Ensure we lower KCFI operand bundles with -O0.
|
||||||
|
PB.registerOptimizerLastEPCallback(
|
||||||
|
[&](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
if (Level == OptimizationLevel::O0 &&
|
||||||
|
LangOpts.Sanitize.has(SanitizerKind::KCFI))
|
||||||
|
MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
|
||||||
|
});
|
||||||
|
|
||||||
|
// When optimizations are requested, run KCIFPass after InstCombine to
|
||||||
|
// avoid unnecessary checks.
|
||||||
|
PB.registerPeepholeEPCallback(
|
||||||
|
[&](FunctionPassManager& FPM, OptimizationLevel Level) {
|
||||||
|
if (Level != OptimizationLevel::O0 &&
|
||||||
|
LangOpts.Sanitize.has(SanitizerKind::KCFI))
|
||||||
|
FPM.addPass(KCFIPass());
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether we should emit a module summary for regular LTO.
|
||||||
|
/// The module summary should be emitted by default for regular LTO
|
||||||
|
/// except for ld64 targets.
|
||||||
|
///
|
||||||
|
/// \return True if the module summary should be emitted.
|
||||||
|
static bool shouldEmitRegularLTOSummary(const llvm::Triple& targetTriple, const BfCodeGenOptions& codeGenOptions, bool PrepareForLTO)
|
||||||
|
{
|
||||||
|
return PrepareForLTO /*&& !CodeGenOpts.DisableLLVMPasses*/ &&
|
||||||
|
targetTriple.getVendor() != llvm::Triple::Apple;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether we should emit a flag for UnifiedLTO.
|
||||||
|
/// The UnifiedLTO module flag should be set when UnifiedLTO is enabled for
|
||||||
|
/// ThinLTO or Full LTO with module summaries.
|
||||||
|
static bool shouldEmitUnifiedLTOModueFlag(const llvm::Triple& targetTriple, const BfCodeGenOptions& codeGenOptions, bool PrepareForLTO)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
/*return CodeGenOpts.UnifiedLTO &&
|
||||||
|
(CodeGenOpts.PrepareForThinLTO || shouldEmitRegularLTOSummary());*/
|
||||||
|
}
|
||||||
|
|
||||||
|
void BfIRCodeGen::RunOptimizationPipeline(const llvm::Triple& targetTriple)
|
||||||
|
{
|
||||||
|
bool verifyModule = true;
|
||||||
|
|
||||||
|
std::optional<llvm::PGOOptions> pgoOptions;
|
||||||
|
mLLVMTargetMachine->setPGOOption(pgoOptions);
|
||||||
|
|
||||||
|
llvm::PipelineTuningOptions pto;
|
||||||
|
pto.LoopUnrolling = !mCodeGenOptions.mDisableUnrollLoops;
|
||||||
|
// For historical reasons, loop interleaving is set to mirror setting for loop unrolling.
|
||||||
|
pto.LoopInterleaving = !mCodeGenOptions.mDisableUnrollLoops;
|
||||||
|
pto.LoopVectorization = mCodeGenOptions.mLoopVectorize;
|
||||||
|
pto.SLPVectorization = mCodeGenOptions.mSLPVectorize;
|
||||||
|
pto.MergeFunctions = mCodeGenOptions.mMergeFunctions;
|
||||||
|
//TODO:
|
||||||
|
//pto.CallGraphProfile = ???
|
||||||
|
//pto.UnifiedLTO = ???
|
||||||
|
|
||||||
|
llvm::LoopAnalysisManager LAM;
|
||||||
|
llvm::FunctionAnalysisManager FAM;
|
||||||
|
llvm::CGSCCAnalysisManager CGAM;
|
||||||
|
llvm::ModuleAnalysisManager MAM;
|
||||||
|
|
||||||
|
llvm::PassInstrumentationCallbacks PIC;
|
||||||
|
// PrintPassOptions PrintPassOpts;
|
||||||
|
// PrintPassOpts.Indent = DebugPassStructure;
|
||||||
|
// PrintPassOpts.SkipAnalyses = DebugPassStructure;
|
||||||
|
// StandardInstrumentations SI(
|
||||||
|
// TheModule->getContext(),
|
||||||
|
// (CodeGenOpts.DebugPassManager || DebugPassStructure),
|
||||||
|
// CodeGenOpts.VerifyEach, PrintPassOpts);
|
||||||
|
// SI.registerCallbacks(PIC, &MAM);
|
||||||
|
llvm::PassBuilder PB(mLLVMTargetMachine, pto, pgoOptions, &PIC);
|
||||||
|
|
||||||
|
// Register all the basic analyses with the managers.
|
||||||
|
PB.registerModuleAnalyses(MAM);
|
||||||
|
PB.registerCGSCCAnalyses(CGAM);
|
||||||
|
PB.registerFunctionAnalyses(FAM);
|
||||||
|
PB.registerLoopAnalyses(LAM);
|
||||||
|
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
|
||||||
|
|
||||||
|
//llvm::ModulePassManager MPM;
|
||||||
|
// Add a verifier pass, before any other passes, to catch CodeGen issues.
|
||||||
|
|
||||||
|
llvm::ModulePassManager MPM;
|
||||||
|
if (verifyModule)
|
||||||
|
MPM.addPass(llvm::VerifierPass());
|
||||||
|
|
||||||
|
bool disableLLVMPasses = false;
|
||||||
|
if (!disableLLVMPasses)
|
||||||
|
{
|
||||||
|
llvm::OptimizationLevel Level;
|
||||||
|
|
||||||
|
bool PrepareForLTO = false;
|
||||||
|
bool PrepareForThinLTO = mCodeGenOptions.mLTOType == BfLTOType_Thin;
|
||||||
|
//bool performThinLTO = false;
|
||||||
|
|
||||||
|
Level = llvm::OptimizationLevel::O0;
|
||||||
|
switch (mCodeGenOptions.mOptLevel)
|
||||||
|
{
|
||||||
|
case BfOptLevel_O0:
|
||||||
|
Level = llvm::OptimizationLevel::O0;
|
||||||
|
break;
|
||||||
|
case BfOptLevel_O1:
|
||||||
|
Level = llvm::OptimizationLevel::O1;
|
||||||
|
break;
|
||||||
|
case BfOptLevel_O2:
|
||||||
|
Level = llvm::OptimizationLevel::O2;
|
||||||
|
break;
|
||||||
|
case BfOptLevel_O3:
|
||||||
|
Level = llvm::OptimizationLevel::O3;
|
||||||
|
break;
|
||||||
|
case BfOptLevel_Og:
|
||||||
|
Level = llvm::OptimizationLevel::O1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsThinLTOPostLink = false;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// If we reached here with a non-empty index file name, then the index
|
||||||
|
// file was empty and we are not performing ThinLTO backend compilation
|
||||||
|
// (used in testing in a distributed build environment).
|
||||||
|
|
||||||
|
bool IsThinLTOPostLink = !CodeGenOpts.ThinLTOIndexFile.empty();
|
||||||
|
// If so drop any the type test assume sequences inserted for whole program
|
||||||
|
// vtables so that codegen doesn't complain.
|
||||||
|
if (IsThinLTOPostLink)
|
||||||
|
PB.registerPipelineStartEPCallback(
|
||||||
|
[](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
|
||||||
|
/*ImportSummary=*/nullptr,
|
||||||
|
/*DropTypeTests=*/true));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Register callbacks to schedule sanitizer passes at the appropriate part
|
||||||
|
// of the pipeline.
|
||||||
|
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
|
||||||
|
PB.registerScalarOptimizerLateEPCallback(
|
||||||
|
[](FunctionPassManager& FPM, OptimizationLevel Level) {
|
||||||
|
FPM.addPass(BoundsCheckingPass());
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Don't add sanitizers if we are here from ThinLTO PostLink. That already
|
||||||
|
// done on PreLink stage.
|
||||||
|
if (!IsThinLTOPostLink) {
|
||||||
|
addSanitizers(targetTriple, mCodeGenOptions, PB);
|
||||||
|
addKCFIPass(targetTriple, mCodeGenOptions, PB);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (std::optional<GCOVOptions> Options =
|
||||||
|
getGCOVOptions(CodeGenOpts, LangOpts))
|
||||||
|
PB.registerPipelineStartEPCallback(
|
||||||
|
[Options](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
MPM.addPass(GCOVProfilerPass(*Options));
|
||||||
|
});
|
||||||
|
if (std::optional<InstrProfOptions> Options =
|
||||||
|
getInstrProfOptions(CodeGenOpts, LangOpts))
|
||||||
|
PB.registerPipelineStartEPCallback(
|
||||||
|
[Options](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
MPM.addPass(InstrProfilingLoweringPass(*Options, false));
|
||||||
|
});
|
||||||
|
|
||||||
|
// TODO: Consider passing the MemoryProfileOutput to the pass builder via
|
||||||
|
// the PGOOptions, and set this up there.
|
||||||
|
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
|
||||||
|
PB.registerOptimizerLastEPCallback(
|
||||||
|
[](ModulePassManager& MPM, OptimizationLevel Level) {
|
||||||
|
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
|
||||||
|
MPM.addPass(ModuleMemProfilerPass());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (mCodeGenOptions.mLTOType == BfLTOType_Fat)
|
||||||
|
{
|
||||||
|
MPM.addPass(PB.buildFatLTODefaultPipeline(
|
||||||
|
Level, PrepareForThinLTO,
|
||||||
|
PrepareForThinLTO || shouldEmitRegularLTOSummary(targetTriple, mCodeGenOptions, PrepareForLTO)));
|
||||||
|
}
|
||||||
|
else if (PrepareForThinLTO)
|
||||||
|
{
|
||||||
|
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level));
|
||||||
|
}
|
||||||
|
else if (PrepareForLTO)
|
||||||
|
{
|
||||||
|
MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(Level));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MPM.addPass(PB.buildPerModuleDefaultPipeline(Level));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Re-link against any bitcodes supplied via the -mlink-builtin-bitcode option
|
||||||
|
// Some optimizations may generate new function calls that would not have
|
||||||
|
// been linked pre-optimization (i.e. fused sincos calls generated by
|
||||||
|
// AMDGPULibCalls::fold_sincos.)
|
||||||
|
//TODO:
|
||||||
|
// if (ClRelinkBuiltinBitcodePostop)
|
||||||
|
// MPM.addPass(LinkInModulesPass(BC, false));
|
||||||
|
|
||||||
|
// Add a verifier pass if requested. We don't have to do this if the action
|
||||||
|
// requires code generation because there will already be a verifier pass in
|
||||||
|
// the code-generation pipeline.
|
||||||
|
// Since we already added a verifier pass above, this
|
||||||
|
// might even not run the analysis, if previous passes caused no changes.
|
||||||
|
// if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
|
||||||
|
// MPM.addPass(VerifierPass());
|
||||||
|
|
||||||
|
//TODO:
|
||||||
|
#if 0
|
||||||
|
if (Action == Backend_EmitBC || Action == Backend_EmitLL || CodeGenOpts.FatLTO)
|
||||||
|
{
|
||||||
|
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
|
||||||
|
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
|
||||||
|
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
|
||||||
|
CodeGenOpts.EnableSplitLTOUnit);
|
||||||
|
if (Action == Backend_EmitBC) {
|
||||||
|
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
|
||||||
|
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
|
||||||
|
if (!ThinLinkOS)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
MPM.addPass(ThinLTOBitcodeWriterPass(
|
||||||
|
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
|
||||||
|
}
|
||||||
|
else if (Action == Backend_EmitLL) {
|
||||||
|
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
|
||||||
|
/*EmitLTOSummary=*/true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Emit a module summary by default for Regular LTO except for ld64
|
||||||
|
// targets
|
||||||
|
bool EmitLTOSummary = shouldEmitRegularLTOSummary();
|
||||||
|
if (EmitLTOSummary) {
|
||||||
|
if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO)
|
||||||
|
TheModule->addModuleFlag(llvm::Module::Error, "ThinLTO", uint32_t(0));
|
||||||
|
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
|
||||||
|
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
|
||||||
|
uint32_t(1));
|
||||||
|
}
|
||||||
|
if (Action == Backend_EmitBC) {
|
||||||
|
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
|
||||||
|
EmitLTOSummary));
|
||||||
|
}
|
||||||
|
else if (Action == Backend_EmitLL) {
|
||||||
|
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
|
||||||
|
EmitLTOSummary));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shouldEmitUnifiedLTOModueFlag())
|
||||||
|
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Print a textual, '-passes=' compatible, representation of pipeline if
|
||||||
|
// requested.
|
||||||
|
// if (PrintPipelinePasses) {
|
||||||
|
// MPM.printPipeline(outs(), [&PIC](StringRef ClassName) {
|
||||||
|
// auto PassName = PIC.getPassNameForClassName(ClassName);
|
||||||
|
// return PassName.empty() ? ClassName : PassName;
|
||||||
|
// });
|
||||||
|
// outs() << "\n";
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
|
||||||
|
// LangOpts.HIPStdParInterposeAlloc)
|
||||||
|
// MPM.addPass(HipStdParAllocationInterpositionPass());
|
||||||
|
|
||||||
|
// Now that we have all of the passes ready, run them.
|
||||||
|
{
|
||||||
|
//PrettyStackTraceString CrashInfo("Optimizer");
|
||||||
|
llvm::TimeTraceScope TimeScope("Optimizer");
|
||||||
|
MPM.run(*mLLVMModule, MAM);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BfIRCodeGen::WriteObjectFile(const StringImpl& outFileName)
|
bool BfIRCodeGen::WriteObjectFile(const StringImpl& outFileName)
|
||||||
|
@ -6183,6 +6037,7 @@ bool BfIRCodeGen::WriteObjectFile(const StringImpl& outFileName)
|
||||||
if (EC)
|
if (EC)
|
||||||
return false;
|
return false;
|
||||||
// Build up all of the passes that we want to do to the module.
|
// Build up all of the passes that we want to do to the module.
|
||||||
|
//llvm::legacy::PassManager PM;
|
||||||
llvm::legacy::PassManager PM;
|
llvm::legacy::PassManager PM;
|
||||||
|
|
||||||
llvm::Triple theTriple = llvm::Triple(mLLVMModule->getTargetTriple());
|
llvm::Triple theTriple = llvm::Triple(mLLVMModule->getTargetTriple());
|
||||||
|
@ -6193,7 +6048,7 @@ bool BfIRCodeGen::WriteObjectFile(const StringImpl& outFileName)
|
||||||
|
|
||||||
// Add the target data from the target machine, if it exists, or the module.
|
// Add the target data from the target machine, if it exists, or the module.
|
||||||
//PM.add(new DataLayoutPass());
|
//PM.add(new DataLayoutPass());
|
||||||
PopulateModulePassManager(PM, mCodeGenOptions);
|
RunOptimizationPipeline(theTriple);
|
||||||
|
|
||||||
llvm::raw_fd_ostream* outStream = NULL;
|
llvm::raw_fd_ostream* outStream = NULL;
|
||||||
defer ( delete outStream; );
|
defer ( delete outStream; );
|
||||||
|
@ -6401,6 +6256,6 @@ void BfIRCodeGen::StaticInit()
|
||||||
LLVMInitializeWebAssemblyTarget();
|
LLVMInitializeWebAssemblyTarget();
|
||||||
LLVMInitializeWebAssemblyTargetMC();
|
LLVMInitializeWebAssemblyTargetMC();
|
||||||
LLVMInitializeWebAssemblyAsmPrinter();
|
LLVMInitializeWebAssemblyAsmPrinter();
|
||||||
LLVMInitializeWebAssemblyAsmParser();
|
//LLVMInitializeWebAssemblyAsmParser();
|
||||||
LLVMInitializeWebAssemblyDisassembler();
|
LLVMInitializeWebAssemblyDisassembler();
|
||||||
}
|
}
|
|
@ -21,6 +21,7 @@ namespace llvm
|
||||||
class Module;
|
class Module;
|
||||||
class LLVMContext;
|
class LLVMContext;
|
||||||
class TargetMachine;
|
class TargetMachine;
|
||||||
|
class Triple;
|
||||||
};
|
};
|
||||||
|
|
||||||
NS_BF_BEGIN
|
NS_BF_BEGIN
|
||||||
|
@ -208,6 +209,8 @@ public:
|
||||||
BfIRTypedValue GetAlignedPtr(const BfIRTypedValue& val);
|
BfIRTypedValue GetAlignedPtr(const BfIRTypedValue& val);
|
||||||
llvm::Value* DoCheckedIntrinsic(llvm::Intrinsic::ID intrin, llvm::Value* lhs, llvm::Value* rhs, bool useAsm);
|
llvm::Value* DoCheckedIntrinsic(llvm::Intrinsic::ID intrin, llvm::Value* lhs, llvm::Value* rhs, bool useAsm);
|
||||||
|
|
||||||
|
void RunOptimizationPipeline(const llvm::Triple& targetTriple);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BfIRCodeGen();
|
BfIRCodeGen();
|
||||||
~BfIRCodeGen();
|
~BfIRCodeGen();
|
||||||
|
|
|
@ -357,7 +357,8 @@ enum BfOptLevel
|
||||||
enum BfLTOType
|
enum BfLTOType
|
||||||
{
|
{
|
||||||
BfLTOType_None = 0,
|
BfLTOType_None = 0,
|
||||||
BfLTOType_Thin = 1
|
BfLTOType_Thin = 1,
|
||||||
|
BfLTOType_Fat = 2
|
||||||
};
|
};
|
||||||
|
|
||||||
enum BfCFLAAType
|
enum BfCFLAAType
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue