diff --git a/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch b/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch deleted file mode 100644 index 1f6816c..0000000 --- a/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 70259f75a5546d331b0d687227341f653a4bf544 Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Thu, 25 Mar 2021 08:34:28 +1000 -Subject: [PATCH] drisw: move zink down the list below the sw drivers. - -We don't ever want drisw path picking zink as the driver, -we can revisit this when the penny wrapper work gets further -along. - -This selection causes systems with nvidia/intel dual-gpus -to try and pick the intel gpu for rendering in the nvidia -context if there is no nvidia GL driver or accel doesn't work. - -This is a partial revert of the original commit. - -Fixes: 4a3b42a717ce ("drisw: Prefer hardware-layered sw-winsys drivers over pure sw") ---- - src/gallium/auxiliary/target-helpers/inline_sw_helper.h | 6 +++--- - src/gallium/auxiliary/target-helpers/sw_helper.h | 6 +++--- - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h -index c494840c44e..76eda8467b8 100644 ---- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h -+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h -@@ -81,9 +81,6 @@ sw_screen_create(struct sw_winsys *winsys) - UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false); - const char *drivers[] = { - debug_get_option("GALLIUM_DRIVER", ""), --#if defined(GALLIUM_ZINK) -- only_sw ? "" : "zink", --#endif - #if defined(GALLIUM_D3D12) - only_sw ? "" : "d3d12", - #endif -@@ -95,6 +92,9 @@ sw_screen_create(struct sw_winsys *winsys) - #endif - #if defined(GALLIUM_SWR) - "swr", -+#endif -+#if defined(GALLIUM_ZINK) -+ only_sw ? "" : "zink", - #endif - }; - -diff --git a/src/gallium/auxiliary/target-helpers/sw_helper.h b/src/gallium/auxiliary/target-helpers/sw_helper.h -index d9469d9f5e3..88a5086d261 100644 ---- a/src/gallium/auxiliary/target-helpers/sw_helper.h -+++ b/src/gallium/auxiliary/target-helpers/sw_helper.h -@@ -86,9 +86,6 @@ sw_screen_create(struct sw_winsys *winsys) - UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false); - const char *drivers[] = { - debug_get_option("GALLIUM_DRIVER", ""), --#if defined(GALLIUM_ZINK) -- only_sw ? "" : "zink", --#endif - #if defined(GALLIUM_D3D12) - only_sw ? "" : "d3d12", - #endif -@@ -100,6 +97,9 @@ sw_screen_create(struct sw_winsys *winsys) - #endif - #if defined(GALLIUM_SWR) - "swr", -+#endif -+#if defined(GALLIUM_ZINK) -+ only_sw ? "" : "zink", - #endif - }; - --- -2.29.2 - diff --git a/cpu_caps_fixes.patch b/cpu_caps_fixes.patch deleted file mode 100644 index 31d5283..0000000 --- a/cpu_caps_fixes.patch +++ /dev/null @@ -1,1506 +0,0 @@ -diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c -index d5c7891de62..b02ea8879e6 100644 ---- a/src/amd/common/ac_gpu_info.c -+++ b/src/amd/common/ac_gpu_info.c -@@ -510,8 +510,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, - util_cpu_detect(); - info->smart_access_memory = info->all_vram_visible && - info->chip_class >= GFX10_3 && -- util_cpu_caps.family >= CPU_AMD_ZEN3 && -- util_cpu_caps.family < CPU_AMD_LAST; -+ util_get_cpu_caps()->family >= CPU_AMD_ZEN3 && -+ util_get_cpu_caps()->family < CPU_AMD_LAST; - - /* Set chip identification. */ - info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */ -diff --git a/src/amd/compiler/tests/main.cpp b/src/amd/compiler/tests/main.cpp -index 8f5e8ea914b..e0abf63b525 100644 ---- a/src/amd/compiler/tests/main.cpp -+++ b/src/amd/compiler/tests/main.cpp -@@ -34,6 +34,8 @@ - #include "aco_ir.h" - #include "framework.h" - -+#include "util/u_cpu_detect.h" -+ - static const char *help_message = - "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n" - "\n" -@@ -241,6 +243,8 @@ int main(int argc, char **argv) - return 99; - } - -+ util_cpu_detect(); -+ - if (do_list) { - for (auto test : tests) - printf("%s\n", test.first.c_str()); -diff --git a/src/compiler/glsl/standalone.cpp b/src/compiler/glsl/standalone.cpp -index b34583e54bd..ad1da65bcef 100644 ---- a/src/compiler/glsl/standalone.cpp -+++ b/src/compiler/glsl/standalone.cpp -@@ -398,6 +398,8 @@ standalone_compile_shader(const struct standalone_options *_options, - int status = EXIT_SUCCESS; - bool glsl_es = false; - -+ util_cpu_detect(); -+ - options = _options; - - switch (options->glsl_version) { -diff --git a/src/compiler/nir/tests/negative_equal_tests.cpp b/src/compiler/nir/tests/negative_equal_tests.cpp -index ff9eeb27f40..c7cf53543bb 100644 ---- a/src/compiler/nir/tests/negative_equal_tests.cpp -+++ b/src/compiler/nir/tests/negative_equal_tests.cpp -@@ -36,6 +36,7 @@ protected: - const_value_negative_equal_test() - { - glsl_type_singleton_init_or_ref(); -+ util_cpu_detect(); - - memset(c1, 0, sizeof(c1)); - memset(c2, 0, sizeof(c2)); -@@ -55,6 +56,7 @@ protected: - alu_srcs_negative_equal_test() - { - glsl_type_singleton_init_or_ref(); -+ util_cpu_detect(); - - static const nir_shader_compiler_options options = { }; - bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options, -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c -index 165d73d94fc..33269e528fe 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c -@@ -104,13 +104,13 @@ lp_build_min_simple(struct lp_build_context *bld, - - /* TODO: optimize the constant case */ - -- if (type.floating && util_cpu_caps.has_sse) { -+ if (type.floating && util_get_cpu_caps()->has_sse) { - if (type.width == 32) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse.min.ss"; - intr_size = 128; - } -- else if (type.length <= 4 || !util_cpu_caps.has_avx) { -+ else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse.min.ps"; - intr_size = 128; - } -@@ -119,12 +119,12 @@ lp_build_min_simple(struct lp_build_context *bld, - intr_size = 256; - } - } -- if (type.width == 64 && util_cpu_caps.has_sse2) { -+ if (type.width == 64 && util_get_cpu_caps()->has_sse2) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse2.min.sd"; - intr_size = 128; - } -- else if (type.length == 2 || !util_cpu_caps.has_avx) { -+ else if (type.length == 2 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse2.min.pd"; - intr_size = 128; - } -@@ -134,7 +134,7 @@ lp_build_min_simple(struct lp_build_context *bld, - } - } - } -- else if (type.floating && util_cpu_caps.has_altivec) { -+ else if (type.floating && util_get_cpu_caps()->has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN || - nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { - debug_printf("%s: altivec doesn't support nan return nan behavior\n", -@@ -144,7 +144,7 @@ lp_build_min_simple(struct lp_build_context *bld, - intrinsic = "llvm.ppc.altivec.vminfp"; - intr_size = 128; - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intr_size = 128; - if (type.width == 8) { - if (!type.sign) { -@@ -174,7 +174,7 @@ lp_build_min_simple(struct lp_build_context *bld, - * The sse intrinsics return the second operator in case of nan by - * default so we need to special code to handle those. - */ -- if (util_cpu_caps.has_sse && type.floating && -+ if (util_get_cpu_caps()->has_sse && type.floating && - nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && - nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { -@@ -274,13 +274,13 @@ lp_build_max_simple(struct lp_build_context *bld, - - /* TODO: optimize the constant case */ - -- if (type.floating && util_cpu_caps.has_sse) { -+ if (type.floating && util_get_cpu_caps()->has_sse) { - if (type.width == 32) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse.max.ss"; - intr_size = 128; - } -- else if (type.length <= 4 || !util_cpu_caps.has_avx) { -+ else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse.max.ps"; - intr_size = 128; - } -@@ -289,12 +289,12 @@ lp_build_max_simple(struct lp_build_context *bld, - intr_size = 256; - } - } -- if (type.width == 64 && util_cpu_caps.has_sse2) { -+ if (type.width == 64 && util_get_cpu_caps()->has_sse2) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse2.max.sd"; - intr_size = 128; - } -- else if (type.length == 2 || !util_cpu_caps.has_avx) { -+ else if (type.length == 2 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse2.max.pd"; - intr_size = 128; - } -@@ -304,7 +304,7 @@ lp_build_max_simple(struct lp_build_context *bld, - } - } - } -- else if (type.floating && util_cpu_caps.has_altivec) { -+ else if (type.floating && util_get_cpu_caps()->has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN || - nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { - debug_printf("%s: altivec doesn't support nan return nan behavior\n", -@@ -314,7 +314,7 @@ lp_build_max_simple(struct lp_build_context *bld, - intrinsic = "llvm.ppc.altivec.vmaxfp"; - intr_size = 128; - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intr_size = 128; - if (type.width == 8) { - if (!type.sign) { -@@ -338,7 +338,7 @@ lp_build_max_simple(struct lp_build_context *bld, - } - - if (intrinsic) { -- if (util_cpu_caps.has_sse && type.floating && -+ if (util_get_cpu_caps()->has_sse && type.floating && - nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && - nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { -@@ -472,12 +472,12 @@ lp_build_add(struct lp_build_context *bld, - return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); - } - if (type.width * type.length == 128) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if (type.width == 16) -@@ -485,7 +485,7 @@ lp_build_add(struct lp_build_context *bld, - } - } - if (type.width * type.length == 256) { -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if (type.width == 16) -@@ -713,11 +713,11 @@ lp_build_hadd_partial4(struct lp_build_context *bld, - tmp[2] = num_vecs > 2 ? vectors[2] : vectors[0]; - tmp[3] = num_vecs > 3 ? vectors[3] : vectors[0]; - -- if (util_cpu_caps.has_sse3 && bld->type.width == 32 && -+ if (util_get_cpu_caps()->has_sse3 && bld->type.width == 32 && - bld->type.length == 4) { - intrinsic = "llvm.x86.sse3.hadd.ps"; - } -- else if (util_cpu_caps.has_avx && bld->type.width == 32 && -+ else if (util_get_cpu_caps()->has_avx && bld->type.width == 32 && - bld->type.length == 8) { - intrinsic = "llvm.x86.avx.hadd.ps.256"; - } -@@ -796,12 +796,12 @@ lp_build_sub(struct lp_build_context *bld, - return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); - } - if (type.width * type.length == 128) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if (type.width == 16) -@@ -809,7 +809,7 @@ lp_build_sub(struct lp_build_context *bld, - } - } - if (type.width * type.length == 256) { -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b"; - if (type.width == 16) -@@ -1078,8 +1078,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, - */ - if (LLVM_VERSION_MAJOR < 7 && - (bld->type.length == 4 || bld->type.length == 8) && -- ((util_cpu_caps.has_sse2 && (bld->type.sign == 0)) || -- util_cpu_caps.has_sse4_1)) { -+ ((util_get_cpu_caps()->has_sse2 && (bld->type.sign == 0)) || -+ util_get_cpu_caps()->has_sse4_1)) { - const char *intrinsic = NULL; - LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd; - LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec; -@@ -1096,7 +1096,7 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, - aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, ""); - bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, ""); - -- if (util_cpu_caps.has_avx2 && bld->type.length == 8) { -+ if (util_get_cpu_caps()->has_avx2 && bld->type.length == 8) { - if (bld->type.sign) { - intrinsic = "llvm.x86.avx2.pmul.dq"; - } else { -@@ -1331,8 +1331,8 @@ lp_build_div(struct lp_build_context *bld, - - /* fast rcp is disabled (just uses div), so makes no sense to try that */ - if(FALSE && -- ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) && -+ ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) && - type.floating) - return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - -@@ -1745,7 +1745,7 @@ lp_build_abs(struct lp_build_context *bld, - return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); - } - -- if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && LLVM_VERSION_MAJOR < 6) { -+ if(type.width*type.length == 128 && util_get_cpu_caps()->has_ssse3 && LLVM_VERSION_MAJOR < 6) { - switch(type.width) { - case 8: - return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); -@@ -1755,7 +1755,7 @@ lp_build_abs(struct lp_build_context *bld, - return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); - } - } -- else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && LLVM_VERSION_MAJOR < 6) { -+ else if (type.width*type.length == 256 && util_get_cpu_caps()->has_avx2 && LLVM_VERSION_MAJOR < 6) { - switch(type.width) { - case 8: - return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a); -@@ -1897,15 +1897,15 @@ lp_build_int_to_float(struct lp_build_context *bld, - static boolean - arch_rounding_available(const struct lp_type type) - { -- if ((util_cpu_caps.has_sse4_1 && -+ if ((util_get_cpu_caps()->has_sse4_1 && - (type.length == 1 || type.width*type.length == 128)) || -- (util_cpu_caps.has_avx && type.width*type.length == 256) || -- (util_cpu_caps.has_avx512f && type.width*type.length == 512)) -+ (util_get_cpu_caps()->has_avx && type.width*type.length == 256) || -+ (util_get_cpu_caps()->has_avx512f && type.width*type.length == 512)) - return TRUE; -- else if ((util_cpu_caps.has_altivec && -+ else if ((util_get_cpu_caps()->has_altivec && - (type.width == 32 && type.length == 4))) - return TRUE; -- else if (util_cpu_caps.has_neon) -+ else if (util_get_cpu_caps()->has_neon) - return TRUE; - - return FALSE; -@@ -1935,7 +1935,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, - assert(type.width == 32); - - assert(lp_check_value(type, a)); -- assert(util_cpu_caps.has_sse2); -+ assert(util_get_cpu_caps()->has_sse2); - - /* This is relying on MXCSR rounding mode, which should always be nearest. */ - if (type.length == 1) { -@@ -1961,7 +1961,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, - } - else { - assert(type.width*type.length == 256); -- assert(util_cpu_caps.has_avx); -+ assert(util_get_cpu_caps()->has_avx); - - intrinsic = "llvm.x86.avx.cvt.ps2dq.256"; - } -@@ -1987,7 +1987,7 @@ lp_build_round_altivec(struct lp_build_context *bld, - assert(type.floating); - - assert(lp_check_value(type, a)); -- assert(util_cpu_caps.has_altivec); -+ assert(util_get_cpu_caps()->has_altivec); - - (void)type; - -@@ -2014,7 +2014,7 @@ lp_build_round_arch(struct lp_build_context *bld, - LLVMValueRef a, - enum lp_build_round_mode mode) - { -- if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) { - LLVMBuilderRef builder = bld->gallivm->builder; - const struct lp_type type = bld->type; - const char *intrinsic_root; -@@ -2042,7 +2042,7 @@ lp_build_round_arch(struct lp_build_context *bld, - lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type); - return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); - } -- else /* (util_cpu_caps.has_altivec) */ -+ else /* (util_get_cpu_caps()->has_altivec) */ - return lp_build_round_altivec(bld, a, mode); - } - -@@ -2377,9 +2377,9 @@ lp_build_iround(struct lp_build_context *bld, - - assert(lp_check_value(type, a)); - -- if ((util_cpu_caps.has_sse2 && -+ if ((util_get_cpu_caps()->has_sse2 && - ((type.width == 32) && (type.length == 1 || type.length == 4))) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) { - return lp_build_iround_nearest_sse2(bld, a); - } - if (arch_rounding_available(type)) { -@@ -2664,8 +2664,8 @@ lp_build_rcp(struct lp_build_context *bld, - * particular uses that require less workarounds. - */ - -- if (FALSE && ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))){ -+ if (FALSE && ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8))){ - const unsigned num_iterations = 0; - LLVMValueRef res; - unsigned i; -@@ -2784,8 +2784,8 @@ lp_build_fast_rsqrt_available(struct lp_type type) - { - assert(type.floating); - -- if ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { -+ if ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) { - return true; - } - return false; -@@ -3694,7 +3694,7 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, - LLVMValueRef - lp_build_fpstate_get(struct gallivm_state *gallivm) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef mxcsr_ptr = lp_build_alloca( - gallivm, -@@ -3715,7 +3715,7 @@ void - lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, - boolean zero) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - /* turn on DAZ (64) | FTZ (32768) = 32832 if available */ - int daz_ftz = _MM_FLUSH_ZERO_MASK; - -@@ -3724,7 +3724,7 @@ lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, - LLVMValueRef mxcsr = - LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); - -- if (util_cpu_caps.has_daz) { -+ if (util_get_cpu_caps()->has_daz) { - /* Enable denormals are zero mode */ - daz_ftz |= _MM_DENORMALS_ZERO_MASK; - } -@@ -3745,7 +3745,7 @@ void - lp_build_fpstate_set(struct gallivm_state *gallivm, - LLVMValueRef mxcsr_ptr) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - LLVMBuilderRef builder = gallivm->builder; - mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr, - LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c -index 31affad2233..1c050535301 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c -@@ -110,7 +110,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm, - LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); - LLVMValueRef h; - -- if (util_cpu_caps.has_f16c && -+ if (util_get_cpu_caps()->has_f16c && - (src_length == 4 || src_length == 8)) { - if (LLVM_VERSION_MAJOR < 11) { - const char *intrinsic = NULL; -@@ -176,7 +176,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm, - * useless. - */ - -- if (util_cpu_caps.has_f16c && -+ if (util_get_cpu_caps()->has_f16c && - (length == 4 || length == 8)) { - struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); - unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */ -@@ -498,7 +498,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm, - - /* Special case 4x4x32 --> 1x16x8 */ - if (src_type.length == 4 && -- (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) -+ (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec)) - { - num_dsts = (num_srcs + 3) / 4; - dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4; -@@ -509,7 +509,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm, - - /* Special case 2x8x32 --> 1x16x8 */ - if (src_type.length == 8 && -- util_cpu_caps.has_avx) -+ util_get_cpu_caps()->has_avx) - { - num_dsts = (num_srcs + 1) / 2; - dst_type->length = num_srcs * 8 >= 16 ? 16 : num_srcs * 8; -@@ -606,7 +606,7 @@ lp_build_conv(struct gallivm_state *gallivm, - ((dst_type.length == 16 && 4 * num_dsts == num_srcs) || - (num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) && - -- (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) -+ (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec)) - { - struct lp_build_context bld; - struct lp_type int16_type, int32_type; -@@ -719,7 +719,7 @@ lp_build_conv(struct gallivm_state *gallivm, - ((dst_type.length == 16 && 2 * num_dsts == num_srcs) || - (num_dsts == 1 && dst_type.length * num_srcs == 8)) && - -- util_cpu_caps.has_avx) { -+ util_get_cpu_caps()->has_avx) { - - struct lp_build_context bld; - struct lp_type int16_type, int32_type; -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -index 174857e06d9..e17c7881e7d 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -@@ -642,8 +642,8 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm, - * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1. - * Much cheaper (but we don't care that much if n == 1). - */ -- if ((util_cpu_caps.has_sse2 && n == 4) || -- (util_cpu_caps.has_avx2 && n == 8)) { -+ if ((util_get_cpu_caps()->has_sse2 && n == 4) || -+ (util_get_cpu_caps()->has_avx2 && n == 8)) { - color2_2 = lp_build_pavgb(&bld8, colors0, colors1); - color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, ""); - } -@@ -1350,7 +1350,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm, - if (is_dxt1_variant) { - LLVMValueRef color23_2, color2_2; - -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - LLVMValueRef intrargs[2]; - intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, ""); - /* same interleave as for lerp23 - correct result in 2nd element */ -@@ -1389,7 +1389,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm, - color23 = lp_build_select(&bld32, sel_mask, color23, color23_2); - } - -- if (util_cpu_caps.has_ssse3) { -+ if (util_get_cpu_caps()->has_ssse3) { - /* - * Use pshufb as mini-lut. (Only doable with intrinsics as the - * final shuffles are non-constant. pshufb is awesome!) -@@ -1689,7 +1689,7 @@ s3tc_decode_block_dxt5(struct gallivm_state *gallivm, - type16.sign = FALSE; - sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, ""); - -- if (!util_cpu_caps.has_ssse3) { -+ if (!util_get_cpu_caps()->has_ssse3) { - LLVMValueRef acodeg, mask1, acode0, acode1; - - /* extraction of the 3 bit values into something more useful is HARD */ -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -index 121452d7596..97deffe1de0 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -@@ -90,7 +90,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm, - * per element. Didn't measure performance but cuts shader size - * by quite a bit (less difference if cpu has no sse4.1 support). - */ -- if (util_cpu_caps.has_sse2 && n > 1) { -+ if (util_get_cpu_caps()->has_sse2 && n > 1) { - LLVMValueRef sel, tmp, tmp2; - struct lp_build_context bld32; - -@@ -174,7 +174,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm, - * per element. Didn't measure performance but cuts shader size - * by quite a bit (less difference if cpu has no sse4.1 support). - */ -- if (util_cpu_caps.has_sse2 && n > 1) { -+ if (util_get_cpu_caps()->has_sse2 && n > 1) { - LLVMValueRef sel, tmp; - struct lp_build_context bld32; - -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c -index e991b0dc375..42cc17371a0 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c -@@ -488,7 +488,7 @@ lp_build_gather(struct gallivm_state *gallivm, - * 32bit/64bit fetches you're doing it wrong (this is gather, not - * conversion) and it would be awkward for floats. - */ -- } else if (util_cpu_caps.has_avx2 && !need_expansion && -+ } else if (util_get_cpu_caps()->has_avx2 && !need_expansion && - src_width == 32 && (length == 4 || length == 8)) { - return lp_build_gather_avx2(gallivm, length, src_width, dst_type, - base_ptr, offsets); -@@ -500,7 +500,7 @@ lp_build_gather(struct gallivm_state *gallivm, - * (In general, should be more of a win if the fetch is 256bit wide - - * this is true for the 32bit case above too.) - */ -- } else if (0 && util_cpu_caps.has_avx2 && !need_expansion && -+ } else if (0 && util_get_cpu_caps()->has_avx2 && !need_expansion && - src_width == 64 && (length == 2 || length == 4)) { - return lp_build_gather_avx2(gallivm, length, src_width, dst_type, - base_ptr, offsets); -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c -index 685ed0e58aa..dd428242cb9 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_init.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c -@@ -433,6 +433,7 @@ lp_build_init(void) - /* For simulating less capable machines */ - #ifdef DEBUG - if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) { -+ extern struct util_cpu_caps_t util_cpu_caps; - assert(util_cpu_caps.has_sse2); - util_cpu_caps.has_sse3 = 0; - util_cpu_caps.has_ssse3 = 0; -@@ -445,7 +446,7 @@ lp_build_init(void) - } - #endif - -- if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) { -+ if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) { - lp_native_vector_width = 256; - } else { - /* Leave it at 128, even when no SIMD extensions are available. -@@ -460,16 +461,16 @@ lp_build_init(void) - #if LLVM_VERSION_MAJOR < 4 - if (lp_native_vector_width <= 128) { - /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by -- * "util_cpu_caps.has_avx" predicate, and lack the -+ * "util_get_cpu_caps()->has_avx" predicate, and lack the - * "lp_native_vector_width > 128" predicate. And also to ensure a more - * consistent behavior, allowing one to test SSE2 on AVX machines. - * XXX: should not play games with util_cpu_caps directly as it might - * get used for other things outside llvm too. - */ -- util_cpu_caps.has_avx = 0; -- util_cpu_caps.has_avx2 = 0; -- util_cpu_caps.has_f16c = 0; -- util_cpu_caps.has_fma = 0; -+ util_get_cpu_caps()->has_avx = 0; -+ util_get_cpu_caps()->has_avx2 = 0; -+ util_get_cpu_caps()->has_f16c = 0; -+ util_get_cpu_caps()->has_fma = 0; - } - #endif - -@@ -482,7 +483,7 @@ lp_build_init(void) - * Right now denorms get explicitly disabled (but elsewhere) for x86, - * whereas ppc64 explicitly enables them... - */ -- if (util_cpu_caps.has_altivec) { -+ if (util_get_cpu_caps()->has_altivec) { - unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF }; - __asm ( -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c -index 315977ae745..3ed3b5a74b1 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c -@@ -196,7 +196,7 @@ lp_build_compare(struct gallivm_state *gallivm, - - if (!type.floating && !type.sign && - type.width * type.length == 128 && -- util_cpu_caps.has_sse2 && -+ util_get_cpu_caps()->has_sse2 && - (func == PIPE_FUNC_LESS || - func == PIPE_FUNC_LEQUAL || - func == PIPE_FUNC_GREATER || -@@ -348,11 +348,11 @@ lp_build_select(struct lp_build_context *bld, - - res = LLVMBuildSelect(builder, mask, a, b, ""); - } -- else if (((util_cpu_caps.has_sse4_1 && -+ else if (((util_get_cpu_caps()->has_sse4_1 && - type.width * type.length == 128) || -- (util_cpu_caps.has_avx && -+ (util_get_cpu_caps()->has_avx && - type.width * type.length == 256 && type.width >= 32) || -- (util_cpu_caps.has_avx2 && -+ (util_get_cpu_caps()->has_avx2 && - type.width * type.length == 256)) && - !LLVMIsConstant(a) && - !LLVMIsConstant(b) && -@@ -379,7 +379,7 @@ lp_build_select(struct lp_build_context *bld, - intrinsic = "llvm.x86.avx.blendv.ps.256"; - arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); - } else { -- assert(util_cpu_caps.has_avx2); -+ assert(util_get_cpu_caps()->has_avx2); - intrinsic = "llvm.x86.avx2.pblendvb"; - arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); - } -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -index 9b75676a4e2..4f3e696816c 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -@@ -400,22 +400,22 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * http://llvm.org/PR19429 - * http://llvm.org/PR16721 - */ -- MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" ); -- MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" ); -- MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" ); -- MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" ); -- MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1"); -- MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2"); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse ? "+sse" : "-sse" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse2 ? "+sse2" : "-sse2" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse3 ? "+sse3" : "-sse3" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_ssse3 ? "+ssse3" : "-ssse3" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse4_1 ? "+sse4.1" : "-sse4.1"); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse4_2 ? "+sse4.2" : "-sse4.2"); - /* - * AVX feature is not automatically detected from CPUID by the X86 target - * yet, because the old (yet default) JIT engine is not capable of - * emitting the opcodes. On newer llvm versions it is and at least some - * versions (tested with 3.3) will emit avx opcodes without this anyway. - */ -- MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx"); -- MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c"); -- MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma"); -- MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2"); -+ MAttrs.push_back(util_get_cpu_caps()->has_avx ? "+avx" : "-avx"); -+ MAttrs.push_back(util_get_cpu_caps()->has_f16c ? "+f16c" : "-f16c"); -+ MAttrs.push_back(util_get_cpu_caps()->has_fma ? "+fma" : "-fma"); -+ MAttrs.push_back(util_get_cpu_caps()->has_avx2 ? "+avx2" : "-avx2"); - /* disable avx512 and all subvariants */ - MAttrs.push_back("-avx512cd"); - MAttrs.push_back("-avx512er"); -@@ -426,7 +426,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - MAttrs.push_back("-avx512vl"); - #endif - #if defined(PIPE_ARCH_ARM) -- if (!util_cpu_caps.has_neon) { -+ if (!util_get_cpu_caps()->has_neon) { - MAttrs.push_back("-neon"); - MAttrs.push_back("-crypto"); - MAttrs.push_back("-vfp2"); -@@ -434,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - #endif - - #if defined(PIPE_ARCH_PPC) -- MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); -+ MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec"); - #if (LLVM_VERSION_MAJOR < 4) - /* - * Make sure VSX instructions are disabled -@@ -444,7 +444,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0) - * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0) - */ -- if (util_cpu_caps.has_altivec) { -+ if (util_get_cpu_caps()->has_altivec) { - MAttrs.push_back("-vsx"); - } - #else -@@ -458,8 +458,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * Make sure VSX instructions are ENABLED (if supported), unless - * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0. - */ -- if (util_cpu_caps.has_altivec) { -- MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx"); -+ if (util_get_cpu_caps()->has_altivec) { -+ MAttrs.push_back(util_get_cpu_caps()->has_vsx ? "+vsx" : "-vsx"); - } - #endif - #endif -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c -index e1f652a9342..76e57c52f80 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c -@@ -322,7 +322,7 @@ lp_build_interleave2(struct gallivm_state *gallivm, - { - LLVMValueRef shuffle; - -- if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) { -+ if (type.length == 2 && type.width == 128 && util_get_cpu_caps()->has_avx) { - /* - * XXX: This is a workaround for llvm code generation deficiency. Strangely - * enough, while this needs vinsertf128/vextractf128 instructions (hence -@@ -484,7 +484,7 @@ lp_build_unpack2_native(struct gallivm_state *gallivm, - - /* Interleave bits */ - #if UTIL_ARCH_LITTLE_ENDIAN -- if (src_type.length * src_type.width == 256 && util_cpu_caps.has_avx2) { -+ if (src_type.length * src_type.width == 256 && util_get_cpu_caps()->has_avx2) { - *dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0); - *dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1); - } else { -@@ -585,22 +585,22 @@ lp_build_pack2(struct gallivm_state *gallivm, - assert(src_type.length * 2 == dst_type.length); - - /* Check for special cases first */ -- if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) && -+ if ((util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec) && - src_type.width * src_type.length >= 128) { - const char *intrinsic = NULL; - boolean swap_intrinsic_operands = FALSE; - - switch(src_type.width) { - case 32: -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (dst_type.sign) { - intrinsic = "llvm.x86.sse2.packssdw.128"; - } else { -- if (util_cpu_caps.has_sse4_1) { -+ if (util_get_cpu_caps()->has_sse4_1) { - intrinsic = "llvm.x86.sse41.packusdw"; - } - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (dst_type.sign) { - intrinsic = "llvm.ppc.altivec.vpkswss"; - } else { -@@ -613,18 +613,18 @@ lp_build_pack2(struct gallivm_state *gallivm, - break; - case 16: - if (dst_type.sign) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - intrinsic = "llvm.x86.sse2.packsswb.128"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intrinsic = "llvm.ppc.altivec.vpkshss"; - #if UTIL_ARCH_LITTLE_ENDIAN - swap_intrinsic_operands = TRUE; - #endif - } - } else { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - intrinsic = "llvm.x86.sse2.packuswb.128"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intrinsic = "llvm.ppc.altivec.vpkshus"; - #if UTIL_ARCH_LITTLE_ENDIAN - swap_intrinsic_operands = TRUE; -@@ -740,7 +740,7 @@ lp_build_pack2_native(struct gallivm_state *gallivm, - - /* At this point only have special case for avx2 */ - if (src_type.length * src_type.width == 256 && -- util_cpu_caps.has_avx2) { -+ util_get_cpu_caps()->has_avx2) { - switch(src_type.width) { - case 32: - if (dst_type.sign) { -@@ -793,7 +793,7 @@ lp_build_packs2(struct gallivm_state *gallivm, - - /* All X86 SSE non-interleaved pack instructions take signed inputs and - * saturate them, so no need to clamp for those cases. */ -- if(util_cpu_caps.has_sse2 && -+ if(util_get_cpu_caps()->has_sse2 && - src_type.width * src_type.length >= 128 && - src_type.sign && - (src_type.width == 32 || src_type.width == 16)) -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c -index 686abc08620..98dcde912b5 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c -@@ -1152,7 +1152,7 @@ lp_build_minify(struct lp_build_context *bld, - LLVMValueRef size; - assert(bld->type.sign); - if (lod_scalar || -- (util_cpu_caps.has_avx2 || !util_cpu_caps.has_sse)) { -+ (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) { - size = LLVMBuildLShr(builder, base_size, level, "minify"); - size = lp_build_max(bld, size, bld->one); - } -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -index 6740907ebcb..f35a27562e7 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -@@ -3235,7 +3235,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, - * as it appears to be a loss with just AVX) - */ - if (num_quads == 1 || !use_aos || -- (util_cpu_caps.has_avx2 && -+ (util_get_cpu_caps()->has_avx2 && - (bld.num_lods == 1 || - derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { - if (use_aos) { -diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c -index b1c8b990ef1..03b11f914b4 100644 ---- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c -+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c -@@ -35,10 +35,10 @@ - - DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false); - --static struct util_cpu_caps *get_cpu_caps(void) -+static const struct util_cpu_caps_t *get_cpu_caps(void) - { - util_cpu_detect(); -- return &util_cpu_caps; -+ return util_get_cpu_caps(); - } - - int rtasm_cpu_has_sse(void) -diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -index ad687f32853..ddd65fb6a08 100644 ---- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -@@ -2152,17 +2152,17 @@ static void x86_init_func_common( struct x86_function *p ) - { - util_cpu_detect(); - p->caps = 0; -- if(util_cpu_caps.has_mmx) -+ if(util_get_cpu_caps()->has_mmx) - p->caps |= X86_MMX; -- if(util_cpu_caps.has_mmx2) -+ if(util_get_cpu_caps()->has_mmx2) - p->caps |= X86_MMX2; -- if(util_cpu_caps.has_sse) -+ if(util_get_cpu_caps()->has_sse) - p->caps |= X86_SSE; -- if(util_cpu_caps.has_sse2) -+ if(util_get_cpu_caps()->has_sse2) - p->caps |= X86_SSE2; -- if(util_cpu_caps.has_sse3) -+ if(util_get_cpu_caps()->has_sse3) - p->caps |= X86_SSE3; -- if(util_cpu_caps.has_sse4_1) -+ if(util_get_cpu_caps()->has_sse4_1) - p->caps |= X86_SSE4_1; - p->csr = p->store; - #if defined(PIPE_ARCH_X86) -diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c -index 7d5150528bc..28d893a8d82 100644 ---- a/src/gallium/auxiliary/util/u_threaded_context.c -+++ b/src/gallium/auxiliary/util/u_threaded_context.c -@@ -2172,8 +2172,8 @@ tc_set_context_param(struct pipe_context *_pipe, - if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { - /* Pin the gallium thread as requested. */ - util_set_thread_affinity(tc->queue.threads[0], -- util_cpu_caps.L3_affinity_mask[value], -- NULL, util_cpu_caps.num_cpu_mask_bits); -+ util_get_cpu_caps()->L3_affinity_mask[value], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - - /* Execute this immediately (without enqueuing). - * It's required to be thread-safe. -@@ -2951,7 +2951,7 @@ threaded_context_create(struct pipe_context *pipe, - - util_cpu_detect(); - -- if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) -+ if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) - return pipe; - - tc = os_malloc_aligned(sizeof(struct threaded_context), 16); -diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c -index b95e2f0017f..dc559bc3ffb 100644 ---- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c -+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c -@@ -436,7 +436,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, - assert(type.length <= 16); - assert(type.floating); - -- if(util_cpu_caps.has_sse && type.length == 4) { -+ if(util_get_cpu_caps()->has_sse && type.length == 4) { - const char *movmskintr = "llvm.x86.sse.movmsk.ps"; - const char *popcntintr = "llvm.ctpop.i32"; - LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, -@@ -447,7 +447,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, - LLVMInt32TypeInContext(context), bits); - count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); - } -- else if(util_cpu_caps.has_avx && type.length == 8) { -+ else if(util_get_cpu_caps()->has_avx && type.length == 8) { - const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; - const char *popcntintr = "llvm.ctpop.i32"; - LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, -diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c -index 0ba4b48c469..6447a67634b 100644 ---- a/src/gallium/drivers/llvmpipe/lp_screen.c -+++ b/src/gallium/drivers/llvmpipe/lp_screen.c -@@ -919,7 +919,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys) - - screen->allow_cl = !!getenv("LP_CL"); - screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR); -- screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; -+ screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0; - #ifdef EMBEDDED_DEVICE - screen->num_threads = 0; - #endif -diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c -index c602d001f2f..db330eebb8e 100644 ---- a/src/gallium/drivers/llvmpipe/lp_test_arit.c -+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c -@@ -403,7 +403,7 @@ flush_denorm_to_zero(float val) - fi_val.f = val; - - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - if ((fi_val.ui & 0x7f800000) == 0) { - fi_val.ui &= 0xff800000; - } -@@ -479,7 +479,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned - continue; - } - -- if (!util_cpu_caps.has_neon && -+ if (!util_get_cpu_caps()->has_neon && - test->ref == &nearbyintf && length == 2 && - ref != roundf(testval)) { - /* FIXME: The generic (non SSE) path in lp_build_iround, which is -diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c -index 2bf223d66f9..815736166d5 100644 ---- a/src/gallium/drivers/llvmpipe/lp_texture.c -+++ b/src/gallium/drivers/llvmpipe/lp_texture.c -@@ -85,7 +85,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, - * of a block for all formats) though this should not be strictly necessary - * neither. In any case it can only affect compressed or 1d textures. - */ -- unsigned mip_align = MAX2(64, util_cpu_caps.cacheline); -+ unsigned mip_align = MAX2(64, util_get_cpu_caps()->cacheline); - - assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); - assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); -@@ -123,7 +123,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, - if (util_format_is_compressed(pt->format)) - lpr->row_stride[level] = nblocksx * block_size; - else -- lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline); -+ lpr->row_stride[level] = align(nblocksx * block_size, util_get_cpu_caps()->cacheline); - - /* if row_stride * height > LP_MAX_TEXTURE_SIZE */ - if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) { -diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp -index 84d4b44ee57..1fb14e636d7 100644 ---- a/src/gallium/drivers/swr/swr_loader.cpp -+++ b/src/gallium/drivers/swr/swr_loader.cpp -@@ -91,7 +91,7 @@ swr_create_screen(struct sw_winsys *winsys) - - util_cpu_detect(); - -- if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) { -+ if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512er) { - swr_print_info("SWR detected KNL instruction support "); - #ifndef HAVE_SWR_KNL - swr_print_info("(skipping: not built).\n"); -@@ -103,7 +103,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) { -+ if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512bw) { - swr_print_info("SWR detected SKX instruction support "); - #ifndef HAVE_SWR_SKX - swr_print_info("(skipping not built).\n"); -@@ -113,7 +113,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - swr_print_info("SWR detected AVX2 instruction support "); - #ifndef HAVE_SWR_AVX2 - swr_print_info("(skipping not built).\n"); -@@ -123,7 +123,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx) { -+ if (util_get_cpu_caps()->has_avx) { - swr_print_info("SWR detected AVX instruction support "); - #ifndef HAVE_SWR_AVX - swr_print_info("(skipping not built).\n"); -diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h -index 66767e7f1f8..5afe32939a8 100644 ---- a/src/gallium/drivers/vc4/vc4_tiling.h -+++ b/src/gallium/drivers/vc4/vc4_tiling.h -@@ -90,7 +90,7 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride, - int cpp, const struct pipe_box *box) - { - #ifdef USE_ARM_ASM -- if (util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_neon) { - vc4_load_lt_image_neon(dst, dst_stride, src, src_stride, - cpp, box); - return; -@@ -106,7 +106,7 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride, - int cpp, const struct pipe_box *box) - { - #ifdef USE_ARM_ASM -- if (util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_neon) { - vc4_store_lt_image_neon(dst, dst_stride, src, src_stride, - cpp, box); - return; -diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c -index 4d9c4e27ebf..782f16e7f78 100644 ---- a/src/gallium/tests/unit/translate_test.c -+++ b/src/gallium/tests/unit/translate_test.c -@@ -50,6 +50,7 @@ int main(int argc, char** argv) - { - struct translate *(*create_fn)(const struct translate_key *key) = 0; - -+ extern struct util_cpu_caps_t util_cpu_caps; - struct translate_key key; - unsigned output_format; - unsigned input_format; -@@ -87,7 +88,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse")) - { -- if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE (test with qemu)\n"); - return 2; -@@ -99,7 +100,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse2")) - { -- if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse2 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE2 (test with qemu)\n"); - return 2; -@@ -110,7 +111,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse3")) - { -- if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse3 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE3 (test with qemu)\n"); - return 2; -@@ -120,7 +121,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse4.1")) - { -- if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse4_1 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n"); - return 2; -diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c -index 7f2eba9382b..4474cfb82b0 100644 ---- a/src/gallium/tests/unit/u_half_test.c -+++ b/src/gallium/tests/unit/u_half_test.c -@@ -36,13 +36,14 @@ test(void) - int - main(int argc, char **argv) - { -- assert(!util_cpu_caps.has_f16c); -+ util_cpu_detect(); - test(); - -- /* Test f16c. */ -- util_cpu_detect(); -- if (util_cpu_caps.has_f16c) -+ /* Test non-f16c. */ -+ if (util_get_cpu_caps()->has_f16c) { -+ ((struct util_cpu_caps_t *)util_get_cpu_caps())->has_f16c = false; - test(); -+ } - - printf("Success!\n"); - return 0; -diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -index 6b87601f1be..9b3279fc461 100644 ---- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -@@ -327,8 +327,8 @@ static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, - struct amdgpu_winsys *ws = amdgpu_winsys(rws); - - util_set_thread_affinity(ws->cs_queue.threads[0], -- util_cpu_caps.L3_affinity_mask[cache], -- NULL, util_cpu_caps.num_cpu_mask_bits); -+ util_get_cpu_caps()->L3_affinity_mask[cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - } - - static uint32_t kms_handle_hash(const void *key) -diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -index 6b306a6ce7b..91b1dd6b68f 100644 ---- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -@@ -804,8 +804,8 @@ static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, - - if (util_queue_is_initialized(&rws->cs_queue)) { - util_set_thread_affinity(rws->cs_queue.threads[0], -- util_cpu_caps.L3_affinity_mask[cache], -- NULL, util_cpu_caps.num_cpu_mask_bits); -+ util_get_cpu_caps()->L3_affinity_mask[cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - } - } - -diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c -index 6316cad4e32..ed019b55395 100644 ---- a/src/mesa/main/glthread.c -+++ b/src/mesa/main/glthread.c -@@ -214,19 +214,20 @@ _mesa_glthread_flush_batch(struct gl_context *ctx) - /* Pin threads regularly to the same Zen CCX that the main thread is - * running on. The main thread can move between CCXs. - */ -- if (util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 && -+ if (util_get_cpu_caps()->nr_cpus != util_get_cpu_caps()->cores_per_L3 && - /* driver support */ - ctx->Driver.PinDriverToL3Cache && - ++glthread->pin_thread_counter % 128 == 0) { - int cpu = util_get_current_cpu(); - - if (cpu >= 0) { -- unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; -- -- util_set_thread_affinity(glthread->queue.threads[0], -- util_cpu_caps.L3_affinity_mask[L3_cache], -- NULL, util_cpu_caps.num_cpu_mask_bits); -- ctx->Driver.PinDriverToL3Cache(ctx, L3_cache); -+ uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu]; -+ if (L3_cache != U_CPU_INVALID_L3) { -+ util_set_thread_affinity(glthread->queue.threads[0], -+ util_get_cpu_caps()->L3_affinity_mask[L3_cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); -+ ctx->Driver.PinDriverToL3Cache(ctx, L3_cache); -+ } - } - } - -diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c -index d5b20e1955b..204c00a057e 100644 ---- a/src/mesa/state_tracker/st_context.c -+++ b/src/mesa/state_tracker/st_context.c -@@ -821,7 +821,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe, - !st->lower_ucp; - st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders; - -- if (util_cpu_caps.cores_per_L3 == util_cpu_caps.nr_cpus || -+ if (util_get_cpu_caps()->cores_per_L3 == util_get_cpu_caps()->nr_cpus || - !st->pipe->set_context_param) - st->pin_thread_counter = ST_L3_PINNING_DISABLED; - -diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c -index c11934c0a39..482a7cdf745 100644 ---- a/src/mesa/state_tracker/st_draw.c -+++ b/src/mesa/state_tracker/st_draw.c -@@ -116,11 +116,13 @@ prepare_draw(struct st_context *st, struct gl_context *ctx) - int cpu = util_get_current_cpu(); - if (cpu >= 0) { - struct pipe_context *pipe = st->pipe; -- unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; -+ uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu]; - -- pipe->set_context_param(pipe, -- PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, -- L3_cache); -+ if (L3_cache != U_CPU_INVALID_L3) { -+ pipe->set_context_param(pipe, -+ PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, -+ L3_cache); -+ } - } - } - } -diff --git a/src/util/half_float.h b/src/util/half_float.h -index 4e15b2bdb0b..91d4ebd41f0 100644 ---- a/src/util/half_float.h -+++ b/src/util/half_float.h -@@ -59,7 +59,7 @@ static inline uint16_t - _mesa_float_to_half(float val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128 in = {val}; - __m128i out; - -@@ -75,7 +75,7 @@ static inline float - _mesa_half_to_float(uint16_t val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128i in = {val}; - __m128 out; - -@@ -90,7 +90,7 @@ static inline uint16_t - _mesa_float_to_float16_rtz(float val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128 in = {val}; - __m128i out; - -diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c -index f4a62a5c6a8..e6473c2bf6d 100644 ---- a/src/util/tests/format/u_format_test.c -+++ b/src/util/tests/format/u_format_test.c -@@ -850,6 +850,8 @@ int main(int argc, char **argv) - { - boolean success; - -+ util_cpu_detect(); -+ - success = test_all(); - - return success ? 0 : 1; -diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c -index 8cfe3286b1f..4a4b06e1bc6 100644 ---- a/src/util/u_cpu_detect.c -+++ b/src/util/u_cpu_detect.c -@@ -90,7 +90,7 @@ - DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) - - --struct util_cpu_caps util_cpu_caps; -+struct util_cpu_caps_t util_cpu_caps; - - #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - static int has_cpuid(void); -@@ -438,26 +438,22 @@ get_cpu_topology(void) - util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus; - util_cpu_caps.num_L3_caches = 1; - -+ memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3)); -+ - #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - /* AMD Zen */ - if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 && - util_cpu_caps.family < CPU_AMD_LAST) { - uint32_t regs[4]; - -- /* Query the L3 cache count. */ -- cpuid_count(0x8000001D, 3, regs); -- unsigned cache_level = (regs[0] >> 5) & 0x7; -- unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; -- -- if (cache_level != 3 || cores_per_L3 == util_cpu_caps.nr_cpus) -- return; -- - uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0}; - uint32_t mask[UTIL_MAX_CPUS / 32] = {0}; -- uint32_t allowed_mask[UTIL_MAX_CPUS / 32] = {0}; -- uint32_t apic_id[UTIL_MAX_CPUS]; - bool saved = false; - -+ uint32_t L3_found[UTIL_MAX_CPUS] = {0}; -+ uint32_t num_L3_caches = 0; -+ util_affinity_mask *L3_affinity_masks = NULL; -+ - /* Query APIC IDs from each CPU core. - * - * An APIC ID is a logical ID of the CPU with respect to the cache -@@ -484,39 +480,58 @@ get_cpu_topology(void) - !saved ? saved_mask : NULL, - util_cpu_caps.num_cpu_mask_bits)) { - saved = true; -- allowed_mask[i / 32] |= cpu_bit; - - /* Query the APIC ID of the current core. */ - cpuid(0x00000001, regs); -- apic_id[i] = regs[1] >> 24; -+ unsigned apic_id = regs[1] >> 24; -+ -+ /* Query the total core count for the CPU */ -+ uint32_t core_count = 1; -+ if (regs[3] & (1 << 28)) -+ core_count = (regs[1] >> 16) & 0xff; -+ -+ core_count = util_next_power_of_two(core_count); -+ -+ /* Query the L3 cache count. */ -+ cpuid_count(0x8000001D, 3, regs); -+ unsigned cache_level = (regs[0] >> 5) & 0x7; -+ unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; -+ -+ if (cache_level != 3) -+ continue; -+ -+ unsigned local_core_id = apic_id & (core_count - 1); -+ unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count); -+ unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3); -+#define L3_ID(p, i) (p << 16 | i << 1 | 1); -+ -+ unsigned l3_id = L3_ID(phys_id, local_l3_cache_index); -+ int idx = -1; -+ for (unsigned c = 0; c < num_L3_caches; c++) { -+ if (L3_found[c] == l3_id) { -+ idx = c; -+ break; -+ } -+ } -+ if (idx == -1) { -+ idx = num_L3_caches; -+ L3_found[num_L3_caches++] = l3_id; -+ L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches); -+ if (!L3_affinity_masks) -+ return; -+ memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask)); -+ } -+ util_cpu_caps.cpu_to_L3[i] = idx; -+ L3_affinity_masks[idx][i / 32] |= cpu_bit; -+ - } - mask[i / 32] = 0; - } - -- if (saved) { -- -- /* We succeeded in using at least one CPU. */ -- util_cpu_caps.num_L3_caches = util_cpu_caps.nr_cpus / cores_per_L3; -- util_cpu_caps.cores_per_L3 = cores_per_L3; -- util_cpu_caps.L3_affinity_mask = calloc(sizeof(util_affinity_mask), -- util_cpu_caps.num_L3_caches); -- -- for (unsigned i = 0; i < util_cpu_caps.nr_cpus && i < UTIL_MAX_CPUS; -- i++) { -- uint32_t cpu_bit = 1u << (i % 32); -- -- if (allowed_mask[i / 32] & cpu_bit) { -- /* Each APIC ID bit represents a topology level, so we need -- * to round up to the next power of two. -- */ -- unsigned L3_index = apic_id[i] / -- util_next_power_of_two(cores_per_L3); -- -- util_cpu_caps.L3_affinity_mask[L3_index][i / 32] |= cpu_bit; -- util_cpu_caps.cpu_to_L3[i] = L3_index; -- } -- } -+ util_cpu_caps.num_L3_caches = num_L3_caches; -+ util_cpu_caps.L3_affinity_mask = L3_affinity_masks; - -+ if (saved) { - if (debug_get_option_dump_cpu()) { - fprintf(stderr, "CPU <-> L3 cache mapping:\n"); - for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) { -@@ -548,7 +563,7 @@ util_cpu_detect_once(void) - { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); -- util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; -+ util_cpu_caps.nr_cpus = MAX2(1, system_info.dwNumberOfProcessors); - } - #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) - util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h -index 5a9a139c990..1c7239b2ec7 100644 ---- a/src/util/u_cpu_detect.h -+++ b/src/util/u_cpu_detect.h -@@ -55,7 +55,7 @@ enum cpu_family { - - typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; - --struct util_cpu_caps { -+struct util_cpu_caps_t { - int nr_cpus; - enum cpu_family family; - -@@ -105,8 +105,20 @@ struct util_cpu_caps { - util_affinity_mask *L3_affinity_mask; - }; - --extern struct util_cpu_caps --util_cpu_caps; -+#define U_CPU_INVALID_L3 0xffff -+ -+static inline const struct util_cpu_caps_t * -+util_get_cpu_caps(void) -+{ -+ extern struct util_cpu_caps_t util_cpu_caps; -+ -+ /* If you hit this assert, it means that something is using the -+ * cpu-caps without having first called util_cpu_detect() -+ */ -+ assert(util_cpu_caps.nr_cpus >= 1); -+ -+ return &util_cpu_caps; -+} - - void util_cpu_detect(void); - -diff --git a/src/util/u_math.c b/src/util/u_math.c -index 9a8a9ecbbde..41e7f599eb0 100644 ---- a/src/util/u_math.c -+++ b/src/util/u_math.c -@@ -92,7 +92,7 @@ util_fpstate_get(void) - unsigned mxcsr = 0; - - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - mxcsr = _mm_getcsr(); - } - #endif -@@ -110,10 +110,10 @@ unsigned - util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) - { - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - /* Enable flush to zero mode */ - current_mxcsr |= _MM_FLUSH_ZERO_MASK; -- if (util_cpu_caps.has_daz) { -+ if (util_get_cpu_caps()->has_daz) { - /* Enable denormals are zero mode */ - current_mxcsr |= _MM_DENORMALS_ZERO_MASK; - } -@@ -132,7 +132,7 @@ void - util_fpstate_set(unsigned mxcsr) - { - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - _mm_setcsr(mxcsr); - } - #endif -diff --git a/src/util/u_queue.c b/src/util/u_queue.c -index b456871290d..489a8a14be4 100644 ---- a/src/util/u_queue.c -+++ b/src/util/u_queue.c -@@ -267,7 +267,7 @@ util_queue_thread_func(void *input) - util_cpu_detect(); - - util_set_current_thread_affinity(mask, NULL, -- util_cpu_caps.num_cpu_mask_bits); -+ util_get_cpu_caps()->num_cpu_mask_bits); - } - - #if defined(__linux__) diff --git a/mesa.spec b/mesa.spec index d25c86c..5f4d000 100644 --- a/mesa.spec +++ b/mesa.spec @@ -56,9 +56,9 @@ Name: mesa Summary: Mesa graphics libraries -%global ver 21.0.1 +%global ver 21.0.2 Version: %{lua:ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(ver)} -Release: 4%{?dist} +Release: 1%{?dist} License: MIT URL: http://www.mesa3d.org @@ -71,10 +71,6 @@ Source1: Mesa-MLAA-License-Clarification-Email.txt # https://gitlab.freedesktop.org/mesa/mesa/-/issues/4442 Patch0: mesa-llvm12.patch -Patch1: 0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch -# fix AMD EPYC 2-socket machines -Patch2: cpu_caps_fixes.patch - BuildRequires: meson >= 0.45 BuildRequires: gcc BuildRequires: gcc-c++ @@ -616,6 +612,9 @@ popd %endif %changelog +* Wed Apr 07 2021 Pete Walter - 21.0.2-1 +- Update to 21.0.2 + * Thu Apr 01 2021 Dave Airlie - 21.0.1-4 - Backport CPU caps fixes diff --git a/sources b/sources index a80ab6f..1e0226f 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (mesa-21.0.1.tar.xz) = b31b78778b6092dfaf0712f90de3074217574389c4236f8379c127739874f6bd1b47883140a26445d25e58df87e6207278efd048453096ee710d334b1dcfe419 +SHA512 (mesa-21.0.2.tar.xz) = c3d7969b56e1c31ee642e3b7143d565c4233173dab7cc5576b686c873c27134dc8292a9f2caa0a0dd3c54d0c89d27d6030f36a2c84f85dcedee7ae80b19e5c3b