diff --git a/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
new file mode 100644
index 0000000..92ab81c
--- /dev/null
+++ b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
@@ -0,0 +1,41 @@
+From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001
+From: Nikita Popov <npopov@redhat.com>
+Date: Tue, 23 May 2023 12:17:29 +0200
+Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir
+
+This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e.
+While that commit correctly adds the bindir from devtoolset to the
+path, the driver dir / install dir still comes first. This means
+we'll still end up picking /usr/bin/ld rather than the one from
+devtoolset.
+
+Unfortunately, I don't see any way to test this. In the environment
+the tests are run, this would only result in a behavior difference
+if there is an ld binary present in the LLVM build directory, which
+isn't the case.
+
+Differential Revision: https://reviews.llvm.org/D151203
+---
+ clang/lib/Driver/ToolChains/Linux.cpp | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
+index 853ff99d9fe5..aecabb46d4b9 100644
+--- a/clang/lib/Driver/ToolChains/Linux.cpp
++++ b/clang/lib/Driver/ToolChains/Linux.cpp
+@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+     // With devtoolset on RHEL, we want to add a bin directory that is relative
+     // to the detected gcc install, because if we are using devtoolset gcc then
+     // we want to use other tools from devtoolset (e.g. ld) instead of the
+-    // standard system tools.
+-    PPaths.push_back(Twine(GCCInstallation.getParentLibPath() +
+-                     "/../bin").str());
++    // standard system tools. This should take precedence over InstalledDir.
++    PPaths.insert(PPaths.begin(),
++                  Twine(GCCInstallation.getParentLibPath() + "/../bin").str());
+ 
+   if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
+     ExtraOpts.push_back("-X");
+-- 
+2.40.1
+
diff --git a/18-99273.patch b/18-99273.patch
new file mode 100644
index 0000000..bacb46b
--- /dev/null
+++ b/18-99273.patch
@@ -0,0 +1,893 @@
+From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
+From: Carl Ritson <carl.ritson@amd.com>
+Date: Wed, 17 Jul 2024 15:07:42 +0900
+Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
+
+On GFX11.5 shaders having completed exports need to execute/wait
+at a lower priority than shaders still executing exports.
+Add code to maintain normal priority of 2 for shaders that export
+and drop to priority 0 after exports.
+---
+ llvm/lib/Target/AMDGPU/AMDGPU.td              |  15 +-
+ .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
+ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
+ llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
+ .../AMDGPU/required-export-priority.ll        | 344 ++++++++++++++++++
+ .../AMDGPU/required-export-priority.mir       | 293 +++++++++++++++
+ 6 files changed, 765 insertions(+), 3 deletions(-)
+ create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+ create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+
+diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
+index dfc8eaea66f7b..14fcf6a210a78 100644
+--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
++++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
+@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
+   "Has restricted SOffset (immediate not supported)."
+ >;
+ 
++def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
++  "HasRequiredExportPriority",
++  "true",
++  "Export priority must be explicitly manipulated on GFX11.5"
++>;
++
+ //===------------------------------------------------------------===//
+ // Subtarget Features (options and debugging)
+ //===------------------------------------------------------------===//
+@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
+   !listconcat(FeatureISAVersion11_Common.Features,
+     [FeatureSALUFloatInsts,
+      FeatureDPPSrc1SGPR,
+-     FeatureVGPRSingleUseHintInsts])>;
++     FeatureVGPRSingleUseHintInsts,
++     FeatureRequiredExportPriority])>;
+ 
+ def FeatureISAVersion11_5_1 : FeatureSet<
+   !listconcat(FeatureISAVersion11_Common.Features,
+     [FeatureSALUFloatInsts,
+      FeatureDPPSrc1SGPR,
+      FeatureVGPRSingleUseHintInsts,
+-     FeatureGFX11FullVGPRs])>;
++     FeatureGFX11FullVGPRs,
++     FeatureRequiredExportPriority])>;
+ 
+ def FeatureISAVersion12 : FeatureSet<
+   [FeatureGFX12,
+diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+index a402fc6d7e611..a8b171aa82840 100644
+--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+@@ -14,6 +14,7 @@
+ #include "GCNSubtarget.h"
+ #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+ #include "SIMachineFunctionInfo.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
+ #include "llvm/CodeGen/MachineFunction.h"
+ #include "llvm/CodeGen/ScheduleDAG.h"
+ #include "llvm/TargetParser/TargetParser.h"
+@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+   fixWMMAHazards(MI);
+   fixShift64HighRegBug(MI);
+   fixVALUMaskWriteHazard(MI);
++  fixRequiredExportPriority(MI);
+ }
+ 
+ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
+@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
+ 
+   return true;
+ }
++
++static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
++                               const SIInstrInfo &TII) {
++  MachineBasicBlock &EntryMBB = MF->front();
++  if (EntryMBB.begin() != EntryMBB.end()) {
++    auto &EntryMI = *EntryMBB.begin();
++    if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
++        EntryMI.getOperand(0).getImm() >= Priority)
++      return false;
++  }
++
++  BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
++      .addImm(Priority);
++  return true;
++}
++
++bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
++  if (!ST.hasRequiredExportPriority())
++    return false;
++
++  // Assume the following shader types will never have exports,
++  // and avoid adding or adjusting S_SETPRIO.
++  MachineBasicBlock *MBB = MI->getParent();
++  MachineFunction *MF = MBB->getParent();
++  auto CC = MF->getFunction().getCallingConv();
++  switch (CC) {
++  case CallingConv::AMDGPU_CS:
++  case CallingConv::AMDGPU_CS_Chain:
++  case CallingConv::AMDGPU_CS_ChainPreserve:
++  case CallingConv::AMDGPU_KERNEL:
++    return false;
++  default:
++    break;
++  }
++
++  const int MaxPriority = 3;
++  const int NormalPriority = 2;
++  const int PostExportPriority = 0;
++
++  auto It = MI->getIterator();
++  switch (MI->getOpcode()) {
++  case AMDGPU::S_ENDPGM:
++  case AMDGPU::S_ENDPGM_SAVED:
++  case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
++  case AMDGPU::SI_RETURN_TO_EPILOG:
++    // Ensure shader with calls raises priority at entry.
++    // This ensures correct priority if exports exist in callee.
++    if (MF->getFrameInfo().hasCalls())
++      return ensureEntrySetPrio(MF, NormalPriority, TII);
++    return false;
++  case AMDGPU::S_SETPRIO: {
++    // Raise minimum priority unless in workaround.
++    auto &PrioOp = MI->getOperand(0);
++    int Prio = PrioOp.getImm();
++    bool InWA = (Prio == PostExportPriority) &&
++                (It != MBB->begin() && TII.isEXP(*std::prev(It)));
++    if (InWA || Prio >= NormalPriority)
++      return false;
++    PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
++    return true;
++  }
++  default:
++    if (!TII.isEXP(*MI))
++      return false;
++    break;
++  }
++
++  // Check entry priority at each export (as there will only be a few).
++  // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
++  bool Changed = false;
++  if (CC != CallingConv::AMDGPU_Gfx)
++    Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
++
++  auto NextMI = std::next(It);
++  bool EndOfShader = false;
++  if (NextMI != MBB->end()) {
++    // Only need WA at end of sequence of exports.
++    if (TII.isEXP(*NextMI))
++      return Changed;
++    // Assume appropriate S_SETPRIO after export means WA already applied.
++    if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
++        NextMI->getOperand(0).getImm() == PostExportPriority)
++      return Changed;
++    EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
++  }
++
++  const DebugLoc &DL = MI->getDebugLoc();
++
++  // Lower priority.
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
++      .addImm(PostExportPriority);
++
++  if (!EndOfShader) {
++    // Wait for exports to complete.
++    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
++        .addReg(AMDGPU::SGPR_NULL)
++        .addImm(0);
++  }
++
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
++  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
++
++  if (!EndOfShader) {
++    // Return to normal (higher) priority.
++    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
++        .addImm(NormalPriority);
++  }
++
++  return true;
++}
+diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+index 3ccca527c626b..f2a64ab48e180 100644
+--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
+   bool fixWMMAHazards(MachineInstr *MI);
+   bool fixShift64HighRegBug(MachineInstr *MI);
+   bool fixVALUMaskWriteHazard(MachineInstr *MI);
++  bool fixRequiredExportPriority(MachineInstr *MI);
+ 
+   int checkMAIHazards(MachineInstr *MI);
+   int checkMAIHazards908(MachineInstr *MI);
+diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+index e5817594a4521..def89c785b855 100644
+--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
++++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+   bool HasVOPDInsts = false;
+   bool HasVALUTransUseHazard = false;
+   bool HasForceStoreSC0SC1 = false;
++  bool HasRequiredExportPriority = false;
+ 
+   // Dummy feature to use for assembler in tablegen.
+   bool FeatureDisable = false;
+@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
+ 
+   bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
+ 
++  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
++
+   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
+   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
+   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+new file mode 100644
+index 0000000000000..377902f3f0d1a
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+@@ -0,0 +1,344 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
++
++define amdgpu_ps void @test_export_zeroes_f32() #0 {
++; GCN-LABEL: test_export_zeroes_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 0
++; GCN-NEXT:    exp mrt0 off, off, off, off
++; GCN-NEXT:    exp mrt0 off, off, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_en_src0_f32() #0 {
++; GCN-LABEL: test_export_en_src0_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 v3, off, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gs void @test_export_gs() #0 {
++; GCN-LABEL: test_export_gs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_hs void @test_export_hs() #0 {
++; GCN-LABEL: test_export_hs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gfx void @test_export_gfx(float %v) #0 {
++; GCN-LABEL: test_export_gfx:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
++; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
++; GCN-NEXT:    exp mrt0 off, v3, off, off done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_waitcnt expcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_cs void @test_export_cs() #0 {
++; GCN-LABEL: test_export_cs:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_kernel void @test_export_kernel() #0 {
++; GCN-LABEL: test_export_kernel:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
++; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
++; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
++; GCN-NEXT:    exp mrt0 off, v2, off, off done
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
++; GCN-LABEL: test_no_export_gfx:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
++; GCN-NEXT:    s_setpc_b64 s[30:31]
++  ret void
++}
++
++define amdgpu_ps void @test_no_export_ps(float %v) #0 {
++; GCN-LABEL: test_no_export_ps:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_endpgm
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB9_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB9_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_vm_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB10_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB10_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_done_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB11_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB11_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
++; GCN-LABEL: test_if_export_vm_done_f32:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s0, exec_lo
++; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
++; GCN-NEXT:    s_cbranch_execz .LBB12_2
++; GCN-NEXT:  ; %bb.1: ; %exp
++; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:  .LBB12_2: ; %end
++; GCN-NEXT:    s_endpgm
++  %cc = icmp eq i32 %flag, 0
++  br i1 %cc, label %end, label %exp
++
++exp:
++  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
++  br label %end
++
++end:
++  ret void
++}
++
++define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
++; GCN-LABEL: test_export_pos_before_param_across_load:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
++; GCN-NEXT:    v_mov_b32_e32 v1, 0
++; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
++; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
++; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
++; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
++  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
++  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
++; GCN-LABEL: test_export_across_store_load:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    v_mov_b32_e32 v2, 24
++; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
++; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
++; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 8, vcc_lo
++; GCN-NEXT:    v_mov_b32_e32 v2, 0
++; GCN-NEXT:    scratch_store_b32 v0, v1, off
++; GCN-NEXT:    scratch_load_b32 v0, off, off
++; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
++; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_waitcnt vmcnt(0)
++; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
++; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
++; GCN-NEXT:    s_setprio 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_nop 0
++; GCN-NEXT:    s_endpgm
++  %data0 = alloca <4 x float>, align 8, addrspace(5)
++  %data1 = alloca <4 x float>, align 8, addrspace(5)
++  %cmp = icmp eq i32 %idx, 1
++  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
++  store float %v, ptr addrspace(5) %data, align 8
++  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
++  %load0 = load float, ptr addrspace(5) %data0, align 8
++  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
++  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
++  ret void
++}
++
++define amdgpu_ps void @test_export_in_callee(float %v) #0 {
++; GCN-LABEL: test_export_in_callee:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_getpc_b64 s[0:1]
++; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
++; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
++; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
++; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
++; GCN-NEXT:    s_mov_b32 s32, 0
++; GCN-NEXT:    s_waitcnt lgkmcnt(0)
++; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
++; GCN-NEXT:    s_endpgm
++  %x = fadd float %v, 1.0
++  call void @test_export_gfx(float %x)
++  ret void
++}
++
++define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
++; GCN-LABEL: test_export_in_callee_prio:
++; GCN:       ; %bb.0:
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_mov_b32 s32, 0
++; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
++; GCN-NEXT:    s_setprio 2
++; GCN-NEXT:    s_getpc_b64 s[0:1]
++; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
++; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
++; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
++; GCN-NEXT:    s_waitcnt lgkmcnt(0)
++; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
++; GCN-NEXT:    s_endpgm
++  %x = fadd float %v, 1.0
++  call void @llvm.amdgcn.s.setprio(i16 0)
++  call void @test_export_gfx(float %x)
++  ret void
++}
++
++declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
++declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
++declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
++declare void @llvm.amdgcn.s.setprio(i16)
++
++attributes #0 = { nounwind }
++attributes #1 = { nounwind inaccessiblememonly }
++attributes #2 = { nounwind readnone }
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+new file mode 100644
+index 0000000000000..eee04468036e5
+--- /dev/null
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
+@@ -0,0 +1,293 @@
++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
++# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GFX1150 %s
++
++--- |
++  define amdgpu_ps void @end_of_shader() {
++    ret void
++  }
++  define amdgpu_ps void @end_of_shader_return_to_epilogue() {
++    ret void
++  }
++  define amdgpu_ps void @end_of_block() {
++    ret void
++  }
++  define amdgpu_ps void @start_of_block() {
++    ret void
++  }
++  define amdgpu_ps void @block_of_exports() {
++    ret void
++  }
++  define amdgpu_ps void @sparse_exports() {
++    ret void
++  }
++  define amdgpu_ps void @existing_setprio_1() {
++    ret void
++  }
++  define amdgpu_ps void @existing_setprio_2() {
++    ret void
++  }
++...
++
++---
++name: end_of_shader
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: end_of_shader
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: end_of_shader_return_to_epilogue
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    SI_RETURN_TO_EPILOG $vgpr0
++...
++
++---
++name: end_of_block
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: end_of_block
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++
++  bb.1:
++    S_ENDPGM 0
++...
++
++---
++name: start_of_block
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: start_of_block
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.2:
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++
++  bb.1:
++    liveins: $vgpr0
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++
++  bb.2:
++    S_ENDPGM 0
++...
++
++---
++name: block_of_exports
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: block_of_exports
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: sparse_exports
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: sparse_exports
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_ENDPGM 0
++    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: existing_setprio_1
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  ; GFX1150-LABEL: name: existing_setprio_1
++  ; GFX1150: bb.0:
++  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT:   $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.1:
++  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 3
++  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.2:
++  ; GFX1150-NEXT:   successors: %bb.3(0x80000000)
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   S_SETPRIO 3
++  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 2
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT: bb.3:
++  ; GFX1150-NEXT:   liveins: $vgpr0
++  ; GFX1150-NEXT: {{  $}}
++  ; GFX1150-NEXT:   EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++  ; GFX1150-NEXT:   S_SETPRIO 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_NOP 0
++  ; GFX1150-NEXT:   S_ENDPGM 0
++  bb.0:
++    liveins: $vgpr0
++    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
++
++  bb.1:
++    liveins: $vgpr0
++    S_SETPRIO 3
++    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
++    S_SETPRIO 0
++
++  bb.2:
++    liveins: $vgpr0
++    S_SETPRIO 1
++    $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
++    S_SETPRIO 0
++
++  bb.3:
++    liveins: $vgpr0
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_ENDPGM 0
++...
++
++---
++name: existing_setprio_2
++tracksRegLiveness: true
++liveins:
++  - { reg: '$vgpr0' }
++body: |
++  bb.0:
++    liveins: $vgpr0
++    ; GFX1150-LABEL: name: existing_setprio_2
++    ; GFX1150: liveins: $vgpr0
++    ; GFX1150-NEXT: {{  $}}
++    ; GFX1150-NEXT: S_SETPRIO 3
++    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    ; GFX1150-NEXT: S_SETPRIO 0
++    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_NOP 0
++    ; GFX1150-NEXT: S_SETPRIO 2
++    ; GFX1150-NEXT: S_SETPRIO 3
++    ; GFX1150-NEXT: S_ENDPGM 0
++    S_SETPRIO 3
++    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
++    S_SETPRIO 3
++    S_ENDPGM 0
++...
+
+From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
+From: Carl Ritson <carl.ritson@amd.com>
+Date: Wed, 17 Jul 2024 16:18:02 +0900
+Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
+
+---
+ llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+index 377902f3f0d1a..ebc209bd4d451 100644
+--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+@@ -1,5 +1,5 @@
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
+ 
+ define amdgpu_ps void @test_export_zeroes_f32() #0 {
+ ; GCN-LABEL: test_export_zeroes_f32:
diff --git a/llvm.spec b/llvm.spec
index b212d21..eb86fdd 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -56,6 +56,11 @@
 # See https://docs.fedoraproject.org/en-US/packaging-guidelines/#_compiler_macros
 %global toolchain clang
 
+
+%if %{defined rhel} && 0%{?rhel} < 10
+%global gts_version 14
+%endif
+
 # Opt out of https://fedoraproject.org/wiki/Changes/fno-omit-frame-pointer
 # https://bugzilla.redhat.com/show_bug.cgi?id=2158587
 %undefine _include_frame_pointers
@@ -245,6 +250,10 @@ Patch102: 0003-PATCH-clang-Don-t-install-static-libraries.patch
 # More info is available here: https://reviews.llvm.org/D159115#4641826
 Patch103: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
 
+# With the introduction of --gcc-include-dir in the clang config file,
+# this might no longer be needed.
+Patch104: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
+
 #region LLD patches
 Patch1800: 0001-18-Always-build-shared-libs-for-LLD.patch
 Patch1902: 0001-19-Always-build-shared-libs-for-LLD.patch
@@ -263,11 +272,20 @@ Patch500: 0001-19-Remove-myst_parser-dependency-for-RHEL.patch
 Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
 #endregion RHEL patches
 
+# Backport with modifications from
+# https://github.com/llvm/llvm-project/pull/99273
+# Fixes RHEL-49517.
+Patch1801: 18-99273.patch
+
 %if 0%{?rhel} == 8
 %global python3_pkgversion 3.12
 %global __python3 /usr/bin/python3.12
 %endif
 
+%if %{defined gts_version}
+# Required for 64-bit atomics on i686.
+BuildRequires: gcc-toolset-%{gts_version}-libatomic-devel
+%endif
 BuildRequires:	gcc
 BuildRequires:	gcc-c++
 BuildRequires:	clang
@@ -510,6 +528,9 @@ libomp-devel to enable -fopenmp.
 %package -n %{pkg_name_clang}-libs
 Summary: Runtime library for clang
 Requires: %{pkg_name_clang}-resource-filesystem%{?_isa} = %{version}-%{release}
+%if %{defined gts_version}
+Requires: gcc-toolset-%{gts_version}-gcc-c++
+%endif
 Recommends: %{pkg_name_compiler_rt}%{?_isa} = %{version}-%{release}
 Requires: %{pkg_name_llvm}-libs = %{version}-%{release}
 # atomic support is not part of compiler-rt
@@ -836,6 +857,7 @@ echo "" > lldb/docs/CMakeLists.txt
 %endif
 
 %if %reduce_debuginfo == 1
+# Decrease debuginfo verbosity to reduce memory consumption during final library linking
 %global optflags %(echo %{optflags} | sed 's/-g /-g1 /')
 %endif
 
@@ -1062,7 +1084,7 @@ fi
 
 %cmake_build
 
-# If we don't build the runtimes target here, we'll have to wait for the %check
+# If we don't build the runtimes target here, we'll have to wait for the %%check
 # section until these files are available but they need to be installed.
 #
 #   /usr/lib64/libomptarget.devicertl.a
@@ -1261,11 +1283,22 @@ echo "%%clang%{maj_ver}_resource_dir %%{_prefix}/lib/clang/%{maj_ver}" >> %{buil
 
 # Install config file for clang
 %if %{maj_ver} >=18
-mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
-echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
-echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
+%global cfg_file_content --gcc-triple=%{_target_cpu}-redhat-linux
+
+%if %{defined rhel} && 0%{?rhel} < 10
+%global cfg_file_content %{cfg_file_content} -gdwarf-4 -g0
 %endif
 
+%if %{defined gts_version}
+%global cfg_file_content %{cfg_file_content} --gcc-install-dir=/opt/rh/gcc-toolset-%{gts_version}/root/usr
+%endif
+
+mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
+echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
+echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
+%endif
+
+
 #endregion CLANG installation
 
 #region COMPILER-RT installation
@@ -2004,14 +2037,14 @@ fi
 
 %files -n %{pkg_name_llvm}-libs
 %license llvm/LICENSE.TXT
-%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}.so
+%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix}.so
 %if %{with gold}
 %{install_libdir}/LLVMgold.so
 %if %{without compat_build}
 %{_libdir}/bfd-plugins/LLVMgold.so
 %endif
 %endif
-%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}
+%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix}
 %{install_libdir}/libLTO.so*
 %{install_libdir}/libRemarks.so*
 %if %{with compat_build}