From 8451454bdb09e291f05da0f0f9a8b2ee29860219 Mon Sep 17 00:00:00 2001
From: Konrad Kleine <kkleine@redhat.com>
Date: Thu, 7 Nov 2024 11:20:13 +0100
Subject: [PATCH] Port changes from RHEL 9

This change brings back some patches we had applied in LLVM 18.
And since the `bundle_compat_lib` switch in RHEL still builds LLVM 18,
I've added them here. This was easily possible due to #323.

This effectively allows us to build LLVM 19 in RHEL9 (see also RHEL-57461).

I've also added the `--gcc-install-dir` to the config file which is used
once clang is installed. This is to tell clang in RHEL which standard
library to link against.

We decided to no longer patch clang to default to DWARF4. Instead we tune
the default by adding `-gdwarf-4` to the config file.

RHEL-wise we've bumped the gts version from 13 to 14 (see RHEL-38228).
---
 ...oolset-path-precedence-over-Installe.patch |  41 +
 18-99273.patch                                | 893 ++++++++++++++++++
 llvm.spec                                     |  45 +-
 3 files changed, 973 insertions(+), 6 deletions(-)
 create mode 100644 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
 create mode 100644 18-99273.patch

diff --git a/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
new file mode 100644
index 0000000..92ab81c
--- /dev/null
+++ b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
@@ -0,0 +1,41 @@
+From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001
+From: Nikita Popov <npopov@redhat.com>
+Date: Tue, 23 May 2023 12:17:29 +0200
+Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir
+
+This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e.
+While that commit correctly adds the bindir from devtoolset to the
+path, the driver dir / install dir still comes first. This means
+we'll still end up picking /usr/bin/ld rather than the one from
+devtoolset.
+
+Unfortunately, I don't see any way to test this. In the environment
+the tests are run, this would only result in a behavior difference
+if there is an ld binary present in the LLVM build directory, which
+isn't the case.
+
+Differential Revision: https://reviews.llvm.org/D151203
+---
+ clang/lib/Driver/ToolChains/Linux.cpp | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
+index 853ff99d9fe5..aecabb46d4b9 100644
+--- a/clang/lib/Driver/ToolChains/Linux.cpp
++++ b/clang/lib/Driver/ToolChains/Linux.cpp
+@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+     // With devtoolset on RHEL, we want to add a bin directory that is relative
+     // to the detected gcc install, because if we are using devtoolset gcc then
+     // we want to use other tools from devtoolset (e.g. ld) instead of the
+-    // standard system tools.
+-    PPaths.push_back(Twine(GCCInstallation.getParentLibPath() +
+-                     "/../bin").str());
++    // standard system tools. This should take precedence over InstalledDir.
++    PPaths.insert(PPaths.begin(),
++                  Twine(GCCInstallation.getParentLibPath() + "/../bin").str());
+ 
+   if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
+     ExtraOpts.push_back("-X");
+-- 
+2.40.1
+
diff --git a/18-99273.patch b/18-99273.patch
new file mode 100644
index 0000000..bacb46b
--- /dev/null
+++ b/18-99273.patch
@@ -0,0 +1,893 @@
+From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
+From: Carl Ritson <carl.ritson@amd.com>
+Date: Wed, 17 Jul 2024 15:07:42 +0900
+Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
+
+On GFX11.5 shaders having completed exports need to execute/wait
+at a lower priority than shaders still executing exports.
+Add code to maintain normal priority of 2 for shaders that export
+and drop to priority 0 after exports.
+---
+ llvm/lib/Target/AMDGPU/AMDGPU.td              |  15 +-
+ .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
+ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
+ llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
+ .../AMDGPU/required-export-priority.ll        | 344 ++++++++++++++++++
+ .../AMDGPU/required-export-priority.mir       | 293 +++++++++++++++
+ 6 files changed, 765 insertions(+), 3 deletions(-)
+ create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+ create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+
+diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
+index dfc8eaea66f7b..14fcf6a210a78 100644
+--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
++++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
+@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
+   "Has restricted SOffset (immediate not supported)."
+ >;
+ 
++def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
++  "HasRequiredExportPriority",
++  "true",
++  "Export priority must be explicitly manipulated on GFX11.5"
++>;
++
+ //===------------------------------------------------------------===//
+ // Subtarget Features (options and debugging)
+ //===------------------------------------------------------------===//
+@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
+   !listconcat(FeatureISAVersion11_Common.Features,
+     [FeatureSALUFloatInsts,
+      FeatureDPPSrc1SGPR,
+-     FeatureVGPRSingleUseHintInsts])>;
++     FeatureVGPRSingleUseHintInsts,
++     FeatureRequiredExportPriority])>;
+ 
+ def FeatureISAVersion11_5_1 : FeatureSet<
+   !listconcat(FeatureISAVersion11_Common.Features,
+     [FeatureSALUFloatInsts,
+      FeatureDPPSrc1SGPR,
+      FeatureVGPRSingleUseHintInsts,
+-     FeatureGFX11FullVGPRs])>;
++     FeatureGFX11FullVGPRs,
++     FeatureRequiredExportPriority])>;
+ 
+ def FeatureISAVersion12 : FeatureSet<
+   [FeatureGFX12,
+diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+index a402fc6d7e611..a8b171aa82840 100644
+--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+@@ -14,6 +14,7 @@
+ #include "GCNSubtarget.h"
+ #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+ #include "SIMachineFunctionInfo.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/CodeGen/ScheduleDAG.h"
+ #include "llvm/TargetParser/TargetParser.h"
+@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+   fixWMMAHazards(MI);
+   fixShift64HighRegBug(MI);
+   fixVALUMaskWriteHazard(MI);
++  fixRequiredExportPriority(MI);
+ }
+ 
+ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
+@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
+ 
+   return true;
+ }
++
++static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
++                               const SIInstrInfo &TII) {
++  MachineBasicBlock &EntryMBB = MF->front();
++  if (EntryMBB.begin() != EntryMBB.end()) {
++    auto &EntryMI = *EntryMBB.begin();
++    if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
++        EntryMI.getOperand(0).getImm() >= Priority)
++      return false;
++  }
++
++  BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
++      .addImm(Priority);
++  return true;
++}
++
++bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
++  if (!ST.hasRequiredExportPriority())
++    return false;
++
++  // Assume the following shader types will never have exports,
++  // and avoid adding or adjusting S_SETPRIO.
++  MachineBasicBlock *MBB = MI->getParent();
++  MachineFunction *MF = MBB->getParent();
++  auto CC = MF->getFunction().getCallingConv();
++  switch (CC) {
++  case CallingConv::AMDGPU_CS:
++  case CallingConv::AMDGPU_CS_Chain:
++  case CallingConv::AMDGPU_CS_ChainPreserve:
++  case CallingConv::AMDGPU_KERNEL:
++    return false;
++  default:
++    break;
++  }
++
++  const int MaxPriority = 3;
++  const int NormalPriority = 2;
++  const int PostExportPriority = 0;
++
++  auto It = MI->getIterator();
++  switch (MI->getOpcode()) {
++  case AMDGPU::S_ENDPGM:
++  case AMDGPU::S_ENDPGM_SAVED:
++  case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
++  case AMDGPU::SI_RETURN_TO_EPILOG:
++    // Ensure shader with calls raises priority at entry.
++    // This ensures correct priority if exports exist in callee.
++    if (MF->getFrameInfo().hasCalls())
++      return ensureEntrySetPrio(MF, NormalPriority, TII);
++    return false;
++  case AMDGPU::S_SETPRIO: {
++    // Raise minimum priority unless in workaround.
++    auto &PrioOp = MI->getOperand(0);
++    int Prio = PrioOp.getImm();
++    bool InWA = (Prio == PostExportPriority) &&
++                (It != MBB->begin() && TII.isEXP(*std::prev(It)));
++    if (InWA || Prio >= NormalPriority)
++      return false;
++    PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
++    return true;
++  }
++  default:
++    if (!TII.isEXP(*MI))
++      return false;
++    break;
++  }
++
++  // Check entry priority at each export (as there will only be a few).
++  // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
++  bool Changed = false;
++  if (CC != CallingConv::AMDGPU_Gfx)
++    Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
++
++  auto NextMI = std::next(It);
++  bool EndOfShader = false;
++  if (NextMI != MBB->end()) {
++    // Only need WA at end of sequence of exports.
++    if (TII.isEXP(*NextMI))
++      return Changed;
++    // Assume appropriate S_SETPRIO after export means WA already applied.
++    if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
++        NextMI->getOperand(0).getImm() == PostExportPriority)
++      return Changed;
++    EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
++  }
++
++  const DebugLoc &DL = MI->getDebugLoc();
++
++  // Lower priority.
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
++      .addImm(PostExportPriority);
++
++  if (!EndOfShader) {
++    // Wait for exports to complete.
++    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
++        .addReg(AMDGPU::SGPR_NULL)
++        .addImm(0);
++  }
++
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
++
++  if (!EndOfShader) {
++    // Return to normal (higher) priority.
++    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
++        .addImm(NormalPriority);
++  }
++
++  return true;
++}
+diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+index 3ccca527c626b..f2a64ab48e180 100644
+--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
+   bool fixWMMAHazards(MachineInstr *MI);
+   bool fixShift64HighRegBug(MachineInstr *MI);
+   bool fixVALUMaskWriteHazard(MachineInstr *MI);
++  bool fixRequiredExportPriority(MachineInstr *MI);
+ 
+   int checkMAIHazards(MachineInstr *MI);
+   int checkMAIHazards908(MachineInstr *MI);
+diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+index e5817594a4521..def89c785b855 100644
+--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
++++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+   bool HasVOPDInsts = false;
+   bool HasVALUTransUseHazard = false;
+   bool HasForceStoreSC0SC1 = false;
++  bool HasRequiredExportPriority = false;
+ 
+   // Dummy feature to use for assembler in tablegen.
+   bool FeatureDisable = false;
+@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+ 
+   bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
+ 
++  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
++
+   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
+   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
+   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+new file mode 100644
+index 0000000000000..377902f3f0d1a
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+@@ -0,0 +1,344 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
++
++define amdgpu_ps void @test_export_zeroes_f32() #0 {
++; GCN-LABEL: test_export_zeroes_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 0
++; GCN-NEXT:    exp mrt0 off, off, off, off
++; GCN-NEXT:    exp mrt0 off, off, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_en_src0_f32() #0 {
++; GCN-LABEL: test_export_en_src0_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 v3, off, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gs void @test_export_gs() #0 {
++; GCN-LABEL: test_export_gs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_hs void @test_export_hs() #0 {
++; GCN-LABEL: test_export_hs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gfx void @test_export_gfx(float %v) #0 {
++; GCN-LABEL: test_export_gfx:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
++; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
++; GCN-NEXT:    exp mrt0 off, v3, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_waitcnt expcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_cs void @test_export_cs() #0 {
++; GCN-LABEL: test_export_cs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_kernel void @test_export_kernel() #0 {
++; GCN-LABEL: test_export_kernel:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
++; GCN-LABEL: test_no_export_gfx:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++  ret void
++}
++
++define amdgpu_ps void @test_no_export_ps(float %v) #0 {
++; GCN-LABEL: test_no_export_ps:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_endpgm
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB9_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB9_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_vm_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB10_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB10_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_done_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB11_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB11_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_vm_done_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB12_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB12_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
++; GCN-LABEL: test_export_pos_before_param_across_load:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
++; GCN-NEXT:    v_mov_b32_e32 v1, 0
++; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
++; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
++; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
++  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
++  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
++; GCN-LABEL: test_export_across_store_load:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v2, 24
++; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
++; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
++; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 8, vcc_lo
++; GCN-NEXT:    v_mov_b32_e32 v2, 0
++; GCN-NEXT:    scratch_store_b32 v0, v1, off
++; GCN-NEXT:    scratch_load_b32 v0, off, off
++; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
++; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
++; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  %data0 = alloca <4 x float>, align 8, addrspace(5)
++  %data1 = alloca <4 x float>, align 8, addrspace(5)
++  %cmp = icmp eq i32 %idx, 1
++  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
++  store float %v, ptr addrspace(5) %data, align 8
++  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
++  %load0 = load float, ptr addrspace(5) %data0, align 8
++  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_in_callee(float %v) #0 {
++; GCN-LABEL: test_export_in_callee:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_getpc_b64 s[0:1]
++; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
++; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
++; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
++; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
++; GCN-NEXT:    s_mov_b32 s32, 0
++; GCN-NEXT:    s_waitcnt lgkmcnt(0)
++; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
++; GCN-NEXT:    s_endpgm
++  %x = fadd float %v, 1.0
++  call void @test_export_gfx(float %x)
++  ret void
++}
++
++define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
++; GCN-LABEL: test_export_in_callee_prio:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s32, 0
++; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_getpc_b64 s[0:1]
++; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
++; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
++; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
++; GCN-NEXT:    s_waitcnt lgkmcnt(0)
++; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
++; GCN-NEXT:    s_endpgm
++  %x = fadd float %v, 1.0
++  call void @llvm.amdgcn.s.setprio(i16 0)
++  call void @test_export_gfx(float %x)
++  ret void
++}
++
++declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
++declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
++declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
++declare void @llvm.amdgcn.s.setprio(i16)
++
++attributes #0 = { nounwind }
++attributes #1 = { nounwind inaccessiblememonly }
++attributes #2 = { nounwind readnone }
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+new file mode 100644
+index 0000000000000..eee04468036e5
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+@@ -0,0 +1,293 @@
++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
++# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GFX1150 %s
++
++--- |
++  define amdgpu_ps void @end_of_shader() {
++    ret void
++  }
++  define amdgpu_ps void @end_of_shader_return_to_epilogue() {
++    ret void
++  }
++  define amdgpu_ps void @end_of_block() {
++    ret void
++  }
++  define amdgpu_ps void @start_of_block() {
++    ret void
++  }
++  define amdgpu_ps void @block_of_exports() {
++    ret void
++  }
++  define amdgpu_ps void @sparse_exports() {
++    ret void
++  }
++  define amdgpu_ps void @existing_setprio_1() {
++    ret void
++  }
++  define amdgpu_ps void @existing_setprio_2() {
++    ret void
++  }
++...
++
++---
++name: end_of_shader
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: end_of_shader
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: end_of_shader_return_to_epilogue
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    SI_RETURN_TO_EPILOG $vgpr0
++...
++
++---
++name: end_of_block
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: end_of_block
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++
++  bb.1:
++    S_ENDPGM 0
++...
++
++---
++name: start_of_block
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: start_of_block
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.2:
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++
++  bb.1:
++    liveins: $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++
++  bb.2:
++    S_ENDPGM 0
++...
++
++---
++name: block_of_exports
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: block_of_exports
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: sparse_exports
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: sparse_exports
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: existing_setprio_1
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: existing_setprio_1
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT:   $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 3
++  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.2:
++  ; GFX1150-NEXT:   successors: %bb.3(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 3
++  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.3:
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++
++  bb.1:
++    liveins: $vgpr0
++    S_SETPRIO 3
++    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    S_SETPRIO 0
++
++  bb.2:
++    liveins: $vgpr0
++    S_SETPRIO 1
++    $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
++    S_SETPRIO 0
++
++  bb.3:
++    liveins: $vgpr0
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: existing_setprio_2
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: existing_setprio_2
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 3
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: S_SETPRIO 3
++    ; GFX1150-NEXT: S_ENDPGM 0
++    S_SETPRIO 3
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_SETPRIO 3
++    S_ENDPGM 0
++...
+
+From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
+From: Carl Ritson <carl.ritson@amd.com>
+Date: Wed, 17 Jul 2024 16:18:02 +0900
+Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
+
+---
+ llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+index 377902f3f0d1a..ebc209bd4d451 100644
+--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+@@ -1,5 +1,5 @@
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
+ 
+ define amdgpu_ps void @test_export_zeroes_f32() #0 {
+ ; GCN-LABEL: test_export_zeroes_f32:
diff --git a/llvm.spec b/llvm.spec
index b212d21..eb86fdd 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -56,6 +56,11 @@
 # See https://docs.fedoraproject.org/en-US/packaging-guidelines/#_compiler_macros
 %global toolchain clang
 
+
+%if %{defined rhel} && 0%{?rhel} < 10
+%global gts_version 14
+%endif
+
 # Opt out of https://fedoraproject.org/wiki/Changes/fno-omit-frame-pointer
 # https://bugzilla.redhat.com/show_bug.cgi?id=2158587
 %undefine _include_frame_pointers
@@ -245,6 +250,10 @@ Patch102: 0003-PATCH-clang-Don-t-install-static-libraries.patch
 # More info is available here: https://reviews.llvm.org/D159115#4641826
 Patch103: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
 
+# With the introduction of --gcc-include-dir in the clang config file,
+# this might no longer be needed.
+Patch104: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
+
 #region LLD patches
 Patch1800: 0001-18-Always-build-shared-libs-for-LLD.patch
 Patch1902: 0001-19-Always-build-shared-libs-for-LLD.patch
@@ -263,11 +272,20 @@ Patch500: 0001-19-Remove-myst_parser-dependency-for-RHEL.patch
 Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
 #endregion RHEL patches
 
+# Backport with modifications from
+# https://github.com/llvm/llvm-project/pull/99273
+# Fixes RHEL-49517.
+Patch1801: 18-99273.patch
+
 %if 0%{?rhel} == 8
 %global python3_pkgversion 3.12
 %global __python3 /usr/bin/python3.12
 %endif
 
+%if %{defined gts_version}
+# Required for 64-bit atomics on i686.
+BuildRequires: gcc-toolset-%{gts_version}-libatomic-devel
+%endif
 BuildRequires:	gcc
 BuildRequires:	gcc-c++
 BuildRequires:	clang
@@ -510,6 +528,9 @@ libomp-devel to enable -fopenmp.
 %package -n %{pkg_name_clang}-libs
 Summary: Runtime library for clang
 Requires: %{pkg_name_clang}-resource-filesystem%{?_isa} = %{version}-%{release}
+%if %{defined gts_version}
+Requires: gcc-toolset-%{gts_version}-gcc-c++
+%endif
 Recommends: %{pkg_name_compiler_rt}%{?_isa} = %{version}-%{release}
 Requires: %{pkg_name_llvm}-libs = %{version}-%{release}
 # atomic support is not part of compiler-rt
@@ -836,6 +857,7 @@ echo "" > lldb/docs/CMakeLists.txt
 %endif
 
 %if %reduce_debuginfo == 1
+# Decrease debuginfo verbosity to reduce memory consumption during final library linking
 %global optflags %(echo %{optflags} | sed 's/-g /-g1 /')
 %endif
 
@@ -1062,7 +1084,7 @@ fi
 
 %cmake_build
 
-# If we don't build the runtimes target here, we'll have to wait for the %check
+# If we don't build the runtimes target here, we'll have to wait for the %%check
 # section until these files are available but they need to be installed.
 #
 #   /usr/lib64/libomptarget.devicertl.a
@@ -1261,11 +1283,22 @@ echo "%%clang%{maj_ver}_resource_dir %%{_prefix}/lib/clang/%{maj_ver}" >> %{buil
 
 # Install config file for clang
 %if %{maj_ver} >=18
-mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
-echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
-echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
+%global cfg_file_content --gcc-triple=%{_target_cpu}-redhat-linux
+
+%if %{defined rhel} && 0%{?rhel} < 10
+%global cfg_file_content %{cfg_file_content} -gdwarf-4 -g0
 %endif
 
+%if %{defined gts_version}
+%global cfg_file_content %{cfg_file_content} --gcc-install-dir=/opt/rh/gcc-toolset-%{gts_version}/root/usr
+%endif
+
+mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
+echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
+echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
+%endif
+
+
 #endregion CLANG installation
 
 #region COMPILER-RT installation
@@ -2004,14 +2037,14 @@ fi
 
 %files -n %{pkg_name_llvm}-libs
 %license llvm/LICENSE.TXT
-%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}.so
+%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix}.so
 %if %{with gold}
 %{install_libdir}/LLVMgold.so
 %if %{without compat_build}
 %{_libdir}/bfd-plugins/LLVMgold.so
 %endif
 %endif
-%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}
+%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix}
 %{install_libdir}/libLTO.so*
 %{install_libdir}/libRemarks.so*
 %if %{with compat_build}