Import Mesa 20.1.7

2020-09-03 06:40:00 +00:00 · 2020-09-03 06:40:00 +00:00 · 7fa385c847
commit 7fa385c847
parent 76a0a851ab
34 changed files with 5213 additions and 133 deletions
--- a/lib/mesa/.pick_status.json
+++ b/lib/mesa/.pick_status.json
--- a/lib/mesa/docs/relnotes/20.1.6.html
+++ b/lib/mesa/docs/relnotes/20.1.6.html
@ -36,7 +36,7 @@ depends on the particular driver being used.

 <h2>SHA256 checksum</h2>
 <pre>
-TBD.
+23bed40114b03ad640c95bfe72cc879ed2f941d0d481b77b5204a1fc567fa93c  mesa-20.1.6.tar.xz
 </pre>


--- a/lib/mesa/docs/relnotes/20.1.7.html
+++ b/lib/mesa/docs/relnotes/20.1.7.html
@ -0,0 +1,169 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8">
+<title>Mesa Release Notes</title>
+<link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+<h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 20.1.7 Release Notes / 2020-09-02</h1>
+
+<p>
+    Mesa 20.1.7 is a bug fix release which fixes bugs found since the 20.1.6 release.
+</p>
+<p>
+Mesa 20.1.7 implements the OpenGL 4.6 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.6. OpenGL
+4.6 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+<p>
+Mesa 20.1.7 implements the Vulkan 1.2 API, but the version reported by
+the apiVersion property of the VkPhysicalDeviceProperties struct
+depends on the particular driver being used.
+</p>
+
+<h2>SHA256 checksum</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<ul>
+    <li>None</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+    <li>Road Redemption certain  graphic effects rendered white color</li>
+    <li>Intel Vulkan driver crash with alpha-to-coverage</li>
+    <li>error: ‘static_assert’ was not declared in this scope</li>
+    <li>vulkan/wsi/x11: deadlock with Xwayland when compositor holds multiple buffers</li>
+    <li>[RADV/ACO] Death Stranding cause a GPU hung (*ERROR* Waiting for fences timed out!)</li>
+    <li>lp_bld_init.c:172:7: error: implicit declaration of function ‘LLVMAddConstantPropagationPass’; did you mean ‘LLVMAddCorrelatedValuePropagationPass’? [-Werror=implicit-function-declaration]</li>
+    <li>radv: blitting 3D images with linear filter</li>
+    <li>&lt;&lt;MESA crashed&gt;&gt; Array Index Out of Range with Graphicsfuzz application</li>
+    <li>Intel Vulkan driver assertion with small xfb buffer</li>
+</ul>
+
+<h2>Changes</h2>
+
+<ul>
+    <p>Alejandro Piñeiro (2):</p>
+    <li>      v3d/packet: fix typo on Set InstanceID/PrimitiveID packet</li>
+    <li>      v3d: set instance id to 0 at start of tile</li>
+    <p></p>
+    <p>Alyssa Rosenzweig (6):</p>
+    <li>      panfrost: Fix blend leak for render targets 5-8</li>
+    <li>      panfrost: Free hash_to_temp map</li>
+    <li>      pan/mdg: Free previous liveness</li>
+    <li>      panfrost: Use memctx for sysvals</li>
+    <li>      panfrost: Free batch-&gt;dependencies</li>
+    <li>      pan/mdg: Fix perspective combination</li>
+    <p></p>
+    <p>Bas Nieuwenhuizen (1):</p>
+    <li>      radv: Fix 3d blits.</li>
+    <p></p>
+    <p>Danylo Piliaiev (3):</p>
+    <li>      glsl: Eliminate out-of-bounds triop_vector_insert</li>
+    <li>      ir_constant: Return zero on out-of-bounds vector accesses</li>
+    <li>      glsl: Eliminate assigments to out-of-bounds elements of vector</li>
+    <p></p>
+    <p>Emil Velikov (1):</p>
+    <li>      radv: restrict exported symbols with static llvm</li>
+    <p></p>
+    <p>Eric Engestrom (10):</p>
+    <li>      docs/relnotes: add sha256 sums to 20.1.6</li>
+    <li>      .pick_status.json: Update to e94c22429b64f419d9a66f04fa5ecdad33f7f5ef</li>
+    <li>      .pick_status.json: Mark 9146f596ed1e8854a2a6c9137396a902bc92946c as denominated</li>
+    <li>      .pick_status.json: Mark da6d0e3facfe0eb5c7db2d75d6992643d929caff as denominated</li>
+    <li>      .pick_status.json: Mark b5558f2d2aa738d90b9e039144ae3ca69bdf92ca as denominated</li>
+    <li>      .pick_status.json: Mark c9858fb941ce7e903f608e537b3657c946f86980 as denominated</li>
+    <li>      .pick_status.json: Mark ee77951714ff4373261befde6e84f592cc1c769c as denominated</li>
+    <li>      .pick_status.json: Mark 7c226116c6c0793d6d9a7dec52ac7cf54b82b57f as denominated</li>
+    <li>      .pick_status.json: Mark d7d7687829875e401690219d4a72458fb2bbe4de as denominated</li>
+    <li>      scons: bump c++ standard to 14 to match meson</li>
+    <p></p>
+    <p>Jason Ekstrand (5):</p>
+    <li>      clover/spirv: Don&#x27;t call llvm::regularizeLlvmForSpirv</li>
+    <li>      intel/nir: Pass the nir_builder by reference in lower_alpha_to_coverage</li>
+    <li>      nir: Add a nir_metadata_all enum value</li>
+    <li>      intel/nir: Rewrite the guts of lower_alpha_to_coverage</li>
+    <li>      intel/fs: Fix MOV_INDIRECT and BROADCAST of Q types on Gen11+</li>
+    <p></p>
+    <p>Jonathan Gray (11):</p>
+    <li>      util: unbreak endian detection on OpenBSD</li>
+    <li>      util/anon_file: add OpenBSD shm_mkstemp() path</li>
+    <li>      meson: build with _ISOC11_SOURCE on OpenBSD</li>
+    <li>      meson: conditionally include -ldl in gbm pkg-config file</li>
+    <li>      util: futex fixes for OpenBSD</li>
+    <li>      util/u_thread: include pthread_np.h if found</li>
+    <li>      anv: use os_get_total_physical_memory()</li>
+    <li>      util/os_misc: add os_get_available_system_memory()</li>
+    <li>      anv: use os_get_available_system_memory()</li>
+    <li>      util/os_misc: os_get_available_system_memory() for OpenBSD</li>
+    <li>      vulkan: make VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT conditional</li>
+    <p></p>
+    <p>Lionel Landwerlin (3):</p>
+    <li>      anv: fix transform feedback surface size</li>
+    <li>      intel/perf: store query symbol name</li>
+    <li>      intel/perf: fix raw query kernel metric selection</li>
+    <p></p>
+    <p>Marek Olšák (3):</p>
+    <li>      st/mesa: don&#x27;t generate NIR for ARB_vp/fp if NIR is not preferred</li>
+    <li>      radeonsi: fix tess levels coming as scalar arrays from SPIR-V</li>
+    <li>      gallivm: fix build on LLVM 12 due to LLVMAddConstantPropagationPass removal</li>
+    <p></p>
+    <p>Marek Vasut (2):</p>
+    <li>      etnaviv: Remove etna_resource_get_status()</li>
+    <li>      etnaviv: Add lock around pending_ctx</li>
+    <p></p>
+    <p>Nanley Chery (1):</p>
+    <li>      gallium/dri2: Report correct YUYV and UYVY plane count</li>
+    <p></p>
+    <p>Pierre Moreau (1):</p>
+    <li>      clover/spirv: Remove unused tuple header</li>
+    <p></p>
+    <p>Pierre-Eric Pelloux-Prayer (5):</p>
+    <li>      mesa/st: introduce PIPE_CAP_NO_CLIP_ON_COPY_TEX</li>
+    <li>      radeonsi: enable PIPE_CAP_NO_CLIP_ON_COPY_TEX</li>
+    <li>      ac/llvm: add option to clamp division by zero</li>
+    <li>      radeonsi,driconf: add clamp_div_by_zero option</li>
+    <li>      radeonsi: use radeonsi_clamp_div_by_zero for SPECviewperf13, Road Redemption</li>
+    <p></p>
+    <p>Rhys Perry (1):</p>
+    <li>      aco: fix non-rtz pack_half_2x16</li>
+    <p></p>
+    <p>Rob Clark (1):</p>
+    <li>      freedreno: handle case of shadowing current render target</li>
+    <p></p>
+    <p>Roman Gilg (2):</p>
+    <li>      vulkan/wsi/x11: add sent image counter</li>
+    <li>      vulkan/wsi/x11: wait for acquirable images in FIFO mode</li>
+    <p></p>
+    <p>Samuel Pitoiset (1):</p>
+    <li>      nir/algebraic: mark some optimizations with fsat(NaN) as inexact</li>
+    <p></p>
+    <p>Vinson Lee (1):</p>
+    <li>      vulkan: Fix memory leaks.</li>
+    <p></p>
+    <p></p>
+</ul>
+
+</div>
+</body>
+</html>
--- a/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c
+++ b/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c
@ -701,6 +701,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 			result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
 						      ac_to_float_type(&ctx->ac, def_type), src[0]);
 		}
+		if (ctx->abi->clamp_div_by_zero)
+			result = ac_build_fmin(&ctx->ac, result,
+					       LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
 		break;
 	case nir_op_iand:
 		result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@ -847,6 +850,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 	case nir_op_frsq:
 		result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
 					      ac_to_float_type(&ctx->ac, def_type), src[0]);
+		if (ctx->abi->clamp_div_by_zero)
+			result = ac_build_fmin(&ctx->ac, result,
+					       LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
 		break;
 	case nir_op_frexp_exp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
--- a/lib/mesa/src/amd/llvm/ac_shader_abi.h
+++ b/lib/mesa/src/amd/llvm/ac_shader_abi.h
@ -186,6 +186,9 @@ struct ac_shader_abi {

 	/* Whether bounds checks are required */
 	bool robust_buffer_access;
+
+	/* Clamp div by 0 (so it won't produce NaN) */
+	bool clamp_div_by_zero;
 };

 #endif /* AC_SHADER_ABI_H */
--- a/lib/mesa/src/amd/vulkan/meson.build
+++ b/lib/mesa/src/amd/vulkan/meson.build
@ -157,6 +157,16 @@ if with_platform_android
  ]
 endif

+# When static linking LLVM, all its symbols are public API.
+# That may cause symbol collision, so explicitly demote everything.
+libvulkan_radeon_ld_args = []
+libvulkan_radeon_link_depends = []
+
+if with_llvm and with_ld_version_script
+  libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan.sym')]
+  libvulkan_radeon_link_depends += files('vulkan.sym')
+endif
+
 libvulkan_radeon = shared_library(
  'vulkan_radeon',
  [libradv_files, radv_entrypoints, radv_extensions_c, amd_vk_format_table_c, sha1_h, radv_gfx10_format_table_h],
@ -173,7 +183,8 @@ libvulkan_radeon = shared_library(
  ],
  c_args : [c_vis_args, no_override_init_args, radv_flags],
  cpp_args : [cpp_vis_args, radv_flags],
-  link_args : [ld_args_bsymbolic, ld_args_gc_sections],
+  link_args : [ld_args_bsymbolic, ld_args_gc_sections, libvulkan_radeon_ld_args],
+  link_depends : [libvulkan_radeon_link_depends,],
  install : true,
 )

--- a/lib/mesa/src/amd/vulkan/vulkan.sym
+++ b/lib/mesa/src/amd/vulkan/vulkan.sym
@ -0,0 +1,11 @@
+{
+	global:
+		vk_icdGetInstanceProcAddr;
+		vk_icdGetPhysicalDeviceProcAddr;
+		vk_icdNegotiateLoaderICDInterfaceVersion;
+
+	local:
+		# When static linking LLVM, all its symbols are public API.
+		# That may cause symbol collision, so explicitly demote everything.
+		*;
+};
--- a/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml
+++ b/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml
@ -630,11 +630,11 @@
  </packet>

  <packet code="54" name="Set InstanceID" cl="B" min_ver="41">
-    <field name="Instance ID" size="32" start="32" type="uint"/>
+    <field name="Instance ID" size="32" start="0" type="uint"/>
  </packet>

  <packet code="55" name="Set PrimitiveID" cl="B" min_ver="41">
-    <field name="Primitive ID" size="32" start="32" type="uint"/>
+    <field name="Primitive ID" size="32" start="0" type="uint"/>
  </packet>

  <packet code="56" name="Prim List Format">
--- a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
+++ b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
@ -136,16 +136,32 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
         ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
         ir->set_lhs(new_lhs);
      }
-   } else if (new_lhs->ir_type != ir_type_swizzle) {
+   } else {
+      unsigned index = old_index_constant->get_uint_component(0);
+
+      if (index >= new_lhs->type->vector_elements) {
+         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
+          *
+          *  In the subsections described above for array, vector, matrix and
+          *  structure accesses, any out-of-bounds access produced undefined
+          *  behavior.... Out-of-bounds writes may be discarded or overwrite
+          *  other variables of the active program.
+          */
+         ir->remove();
+         return visit_continue;
+      }
+
+      if (new_lhs->ir_type != ir_type_swizzle) {
         ir->set_lhs(new_lhs);
-      ir->write_mask = 1 << old_index_constant->get_uint_component(0);
+         ir->write_mask = 1 << index;
      } else {
         /* If the "new" LHS is a swizzle, use the set_lhs helper to instead
          * swizzle the RHS.
          */
-      unsigned component[1] = { old_index_constant->get_uint_component(0) };
+         unsigned component[1] = { index };
         ir->set_lhs(new(mem_ctx) ir_swizzle(new_lhs, component, 1));
      }
+   }

   return ir_rvalue_enter_visitor::visit_enter(ir);
 }
--- a/lib/mesa/src/compiler/glsl/lower_vector_insert.cpp
+++ b/lib/mesa/src/compiler/glsl/lower_vector_insert.cpp
@ -32,7 +32,8 @@ namespace {
 class vector_insert_visitor : public ir_rvalue_visitor {
 public:
   vector_insert_visitor(bool lower_nonconstant_index)
-      : progress(false), lower_nonconstant_index(lower_nonconstant_index)
+      : progress(false), lower_nonconstant_index(lower_nonconstant_index),
+        remove_assignment(false)
   {
      factory.instructions = &factory_instructions;
   }
@ -43,11 +44,13 @@ public:
   }

   virtual void handle_rvalue(ir_rvalue **rv);
+   virtual ir_visitor_status visit_leave(ir_assignment *expr);

   ir_factory factory;
   exec_list factory_instructions;
   bool progress;
   bool lower_nonconstant_index;
+   bool remove_assignment;
 };

 } /* anonymous namespace */
@ -68,6 +71,21 @@ vector_insert_visitor::handle_rvalue(ir_rvalue **rv)
   ir_constant *const idx =
      expr->operands[2]->constant_expression_value(factory.mem_ctx);
   if (idx != NULL) {
+      unsigned index = idx->value.u[0];
+
+      if (index >= expr->operands[0]->type->vector_elements) {
+         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
+          *
+          *  In the subsections described above for array, vector, matrix and
+          *  structure accesses, any out-of-bounds access produced undefined
+          *  behavior.... Out-of-bounds writes may be discarded or overwrite
+          *  other variables of the active program.
+          */
+         this->remove_assignment = true;
+         this->progress = true;
+         return;
+      }
+
      /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of
       * a new temporary.  The new temporary gets assigned as
       *
@ -136,6 +154,19 @@ vector_insert_visitor::handle_rvalue(ir_rvalue **rv)
   base_ir->insert_before(factory.instructions);
 }

+ir_visitor_status
+vector_insert_visitor::visit_leave(ir_assignment *ir)
+{
+   ir_rvalue_visitor::visit_leave(ir);
+
+   if (this->remove_assignment) {
+      ir->remove();
+      this->remove_assignment = false;
+   }
+
+   return visit_continue;
+}
+
 bool
 lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index)
 {
--- a/lib/mesa/src/gallium/auxiliary/util/u_screen.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_screen.c
@ -433,6 +433,9 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
   case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL:
      return 0;

+   case PIPE_CAP_NO_CLIP_ON_COPY_TEX:
+      return 0;
+
   default:
      unreachable("bad PIPE_CAP_*");
   }
--- a/lib/mesa/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/lib/mesa/src/gallium/drivers/panfrost/pan_blend_cso.c
@ -166,7 +166,7 @@ panfrost_delete_blend_state(struct pipe_context *pipe,
 {
        struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;

-        for (unsigned c = 0; c < 4; ++c) {
+        for (unsigned c = 0; c < PIPE_MAX_COLOR_BUFS; ++c) {
                struct panfrost_blend_rt *rt = &blend->rt[c];
                _mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
        }
--- a/lib/mesa/src/gallium/drivers/panfrost/pan_job.c
+++ b/lib/mesa/src/gallium/drivers/panfrost/pan_job.c
@ -178,6 +178,8 @@ panfrost_free_batch(struct panfrost_batch *batch)
                panfrost_batch_fence_unreference(*dep);
        }

+        util_dynarray_fini(&batch->dependencies);
+
        /* The out_sync fence lifetime is different from the the batch one
         * since other batches might want to wait on a fence of already
         * submitted/signaled batch. All we need to do here is make sure the
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_debug_options.h
@ -7,5 +7,6 @@ OPT_BOOL(halt_shaders, false, "Halt shaders at the start (will hang)")
 OPT_BOOL(vs_fetch_always_opencode, false,
         "Always open code vertex fetches (less efficient, purely for testing)")
 OPT_BOOL(prim_restart_tri_strips_only, false, "Only enable primitive restart for triangle strips")
+OPT_BOOL(clamp_div_by_zero, false, "Clamp div by zero (x / 0 becomes FLT_MAX instead of NaN)")

 #undef OPT_BOOL
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_get.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_get.c
@ -162,6 +162,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_DRAW_INFO_START_WITH_USER_INDICES:
   case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL:
   case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
+   case PIPE_CAP_NO_CLIP_ON_COPY_TEX:
      return 1;

   case PIPE_CAP_QUERY_SO_OVERFLOW:
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm.c
@ -452,6 +452,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
   ctx->abi.inputs = &ctx->inputs[0];
   ctx->abi.clamp_shadow_reference = true;
   ctx->abi.robust_buffer_access = true;
+   ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero;

   if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
      assert(gl_shader_stage_is_compute(nir->info.stage));
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@ -513,7 +513,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_
 {
   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
   struct si_shader_info *info = &ctx->shader->selector->info;
-   const unsigned component = var->data.location_frac;
+   unsigned component = var->data.location_frac;
   unsigned driver_location = var->data.driver_location;
   LLVMValueRef dw_addr, stride;
   LLVMValueRef buffer, base, addr;
@ -521,6 +521,12 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_
   bool skip_lds_store;
   bool is_tess_factor = false, is_tess_inner = false;

+   if (var->data.compact) {
+      component += const_index;
+      writemask <<= const_index;
+      const_index = 0;
+   }
+
   driver_location = driver_location / 4;
   ubyte name = info->output_semantic_name[driver_location];
   ubyte index = info->output_semantic_index[driver_location];
--- a/lib/mesa/src/gallium/drivers/v3d/v3dx_rcl.c
+++ b/lib/mesa/src/gallium/drivers/v3d/v3dx_rcl.c
@ -440,6 +440,13 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
                fmt.primitive_type = LIST_TRIANGLES;
        }

+#if V3D_VERSION >= 41
+        /* PTB assumes that value to be 0, but hw will not set it. */
+        cl_emit(cl, SET_INSTANCEID, set) {
+           set.instance_id = 0;
+        }
+#endif
+
        cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);

        v3d_rcl_emit_stores(job, cl, layer);
--- a/lib/mesa/src/gallium/state_trackers/clover/spirv/invocation.cpp
+++ b/lib/mesa/src/gallium/state_trackers/clover/spirv/invocation.cpp
@ -22,7 +22,6 @@

 #include "invocation.hpp"

-#include <tuple>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
--- a/lib/mesa/src/intel/compiler/brw_nir_lower_alpha_to_coverage.c
+++ b/lib/mesa/src/intel/compiler/brw_nir_lower_alpha_to_coverage.c
@ -56,114 +56,134 @@
 *  1.0000 1111111111111111
 */
 static nir_ssa_def *
-build_dither_mask(nir_builder b, nir_intrinsic_instr *store_instr)
+build_dither_mask(nir_builder *b, nir_ssa_def *color)
 {
-   nir_ssa_def *alpha =
-      nir_channel(&b, nir_ssa_for_src(&b, store_instr->src[0], 4), 3);
+   assert(color->num_components == 4);
+   nir_ssa_def *alpha = nir_channel(b, color, 3);

   nir_ssa_def *m =
-      nir_f2i32(&b, nir_fmul_imm(&b, nir_fsat(&b, alpha), 16.0));
+      nir_f2i32(b, nir_fmul_imm(b, nir_fsat(b, alpha), 16.0));

   nir_ssa_def *part_a =
-      nir_iand(&b,
-               nir_imm_int(&b, 0xf),
-               nir_ushr(&b,
-                        nir_imm_int(&b, 0xfea80),
-                        nir_iand(&b, m, nir_imm_int(&b, ~3))));
+      nir_iand(b,
+               nir_imm_int(b, 0xf),
+               nir_ushr(b,
+                        nir_imm_int(b, 0xfea80),
+                        nir_iand(b, m, nir_imm_int(b, ~3))));

-   nir_ssa_def *part_b = nir_iand(&b, m, nir_imm_int(&b, 2));
+   nir_ssa_def *part_b = nir_iand(b, m, nir_imm_int(b, 2));

-   nir_ssa_def *part_c = nir_iand(&b, m, nir_imm_int(&b, 1));
+   nir_ssa_def *part_c = nir_iand(b, m, nir_imm_int(b, 1));

-   return nir_ior(&b,
-                  nir_imul_imm(&b, part_a, 0x1111),
-                  nir_ior(&b,
-                          nir_imul_imm(&b, part_b, 0x0808),
-                          nir_imul_imm(&b, part_c, 0x0100)));
+   return nir_ior(b,
+                  nir_imul_imm(b, part_a, 0x1111),
+                  nir_ior(b,
+                          nir_imul_imm(b, part_b, 0x0808),
+                          nir_imul_imm(b, part_c, 0x0100)));
 }

-void
+bool
 brw_nir_lower_alpha_to_coverage(nir_shader *shader)
 {
   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);

-   /* Bail out early if we don't have gl_SampleMask */
-   bool is_sample_mask = false;
-   nir_foreach_variable(var, &shader->outputs) {
-      if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
-         is_sample_mask = true;
-         break;
+   const uint64_t outputs_written = shader->info.outputs_written;
+   if (!(outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) ||
+       !(outputs_written & (BITFIELD64_BIT(FRAG_RESULT_COLOR) |
+                            BITFIELD64_BIT(FRAG_RESULT_DATA0))))
+      goto skip;
+
+   nir_intrinsic_instr *sample_mask_write = NULL;
+   nir_intrinsic_instr *color0_write = NULL;
+   bool sample_mask_write_first = false;
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         if (intrin->intrinsic != nir_intrinsic_store_output)
+            continue;
+
+         /* We call nir_lower_io_to_temporaries to lower FS outputs to
+          * temporaries with a copy at the end so this should be the last
+          * block in the shader.
+          */
+         assert(block->cf_node.parent == &impl->cf_node);
+         assert(nir_cf_node_is_last(&block->cf_node));
+
+         /* See store_output in fs_visitor::nir_emit_fs_intrinsic */
+         const unsigned store_offset = nir_src_as_uint(intrin->src[1]);
+         const unsigned driver_location = nir_intrinsic_base(intrin) +
+            SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
+
+         /* Extract the FRAG_RESULT */
+         const unsigned location =
+            GET_FIELD(driver_location, BRW_NIR_FRAG_OUTPUT_LOCATION);
+
+         if (location == FRAG_RESULT_SAMPLE_MASK) {
+            assert(sample_mask_write == NULL);
+            sample_mask_write = intrin;
+            sample_mask_write_first = (color0_write == NULL);
+         }
+
+         if (location == FRAG_RESULT_COLOR ||
+             location == FRAG_RESULT_DATA0) {
+            assert(color0_write == NULL);
+            color0_write = intrin;
+         }
      }
   }

-   if (!is_sample_mask)
-      return;
+   /* It's possible that shader_info may be out-of-date and the writes to
+    * either gl_SampleMask or the first color value may have been removed.
+    * This can happen if, for instance a nir_ssa_undef is written to the
+    * color value.  In that case, just bail and don't do anything rather
+    * than crashing.
+    */
+   if (color0_write == NULL || sample_mask_write == NULL)
+      goto skip;
+
+   /* It's possible that the color value isn't actually a vec4.  In this case,
+    * assuming an alpha of 1.0 and letting the sample mask pass through
+    * unaltered seems like the kindest thing to do to apps.
+    */
+   assert(color0_write->src[0].is_ssa);
+   nir_ssa_def *color0 = color0_write->src[0].ssa;
+   if (color0->num_components < 4)
+      goto skip;
+
+   assert(sample_mask_write->src[0].is_ssa);
+   nir_ssa_def *sample_mask = sample_mask_write->src[0].ssa;
+
+   if (sample_mask_write_first) {
+      /* If the sample mask write comes before the write to color0, we need
+       * to move it because it's going to use the value from color0 to
+       * compute the sample mask.
+       */
+      nir_instr_remove(&sample_mask_write->instr);
+      nir_instr_insert(nir_after_instr(&color0_write->instr),
+                       &sample_mask_write->instr);
+   }

-   nir_foreach_function(function, shader) {
-      nir_function_impl *impl = function->impl;
   nir_builder b;
   nir_builder_init(&b, impl);

-      nir_foreach_block(block, impl) {
-         nir_intrinsic_instr *sample_mask_instr = NULL;
-         nir_intrinsic_instr *store_instr = NULL;
-
-         nir_foreach_instr_safe(instr, block) {
-            if (instr->type == nir_instr_type_intrinsic) {
-               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-               nir_variable *out = NULL;
-
-               switch (intr->intrinsic) {
-               case nir_intrinsic_store_output:
-                  nir_foreach_variable(var, &shader->outputs) {
-                     int drvloc = var->data.driver_location;
-                     if (nir_intrinsic_base(intr) == drvloc) {
-                        out = var;
-                        break;
-                     }
-                  }
-
-                  if (out->data.mode != nir_var_shader_out)
-                     continue;
-
-                  /* save gl_SampleMask instruction pointer */
-                  if (out->data.location == FRAG_RESULT_SAMPLE_MASK) {
-                     assert(!sample_mask_instr);
-                     sample_mask_instr = intr;
-                  }
-
-                  /* save out_color[0] instruction pointer */
-                  if ((out->data.location == FRAG_RESULT_COLOR ||
-                      out->data.location == FRAG_RESULT_DATA0)) {
-                     nir_src *offset_src = nir_get_io_offset_src(intr);
-                     if (nir_src_is_const(*offset_src) && nir_src_as_uint(*offset_src) == 0) {
-                        assert(!store_instr);
-                        store_instr = intr;
-                     }
-                  }
-                  break;
-               default:
-                  continue;
-               }
-            }
-         }
-
-         if (sample_mask_instr && store_instr) {
-            b.cursor = nir_before_instr(&store_instr->instr);
-            nir_ssa_def *dither_mask = build_dither_mask(b, store_instr);
-
-            /* Combine dither_mask and reorder gl_SampleMask store instruction
-             * after render target 0 store instruction.
-             */
-            nir_instr_remove(&sample_mask_instr->instr);
-            dither_mask = nir_iand(&b, sample_mask_instr->src[0].ssa, dither_mask);
-            nir_instr_insert_after(&store_instr->instr, &sample_mask_instr->instr);
-            nir_instr_rewrite_src(&sample_mask_instr->instr,
-                                  &sample_mask_instr->src[0],
+   /* Combine dither_mask and the gl_SampleMask value */
+   b.cursor = nir_before_instr(&sample_mask_write->instr);
+   nir_ssa_def *dither_mask = build_dither_mask(&b, color0);
+   dither_mask = nir_iand(&b, sample_mask, dither_mask);
+   nir_instr_rewrite_src(&sample_mask_write->instr,
+                         &sample_mask_write->src[0],
                         nir_src_for_ssa(dither_mask));
-         }
-      }
+
   nir_metadata_preserve(impl, nir_metadata_block_index |
                               nir_metadata_dominance);
-   }
+   return true;
+
+skip:
+   nir_metadata_preserve(impl, nir_metadata_all);
+   return false;
 }
--- a/lib/mesa/src/intel/perf/gen_perf.c
+++ b/lib/mesa/src/intel/perf/gen_perf.c
@ -621,6 +621,19 @@ load_oa_metrics(struct gen_perf_config *perf, int fd,
   else
      enumerate_sysfs_metrics(perf);

+   /* Select a fallback OA metric. Look for the TestOa metric or use the last
+    * one if no present (on HSW).
+    */
+   for (int i = 0; i < perf->n_queries; i++) {
+      if (perf->queries[i].symbol_name &&
+          strcmp(perf->queries[i].symbol_name, "TestOa") == 0) {
+         perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id;
+         break;
+      }
+   }
+   if (perf->fallback_raw_oa_metric == 0)
+      perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id;
+
   return true;
 }

--- a/lib/mesa/src/intel/perf/gen_perf.h
+++ b/lib/mesa/src/intel/perf/gen_perf.h
@ -170,6 +170,7 @@ struct gen_perf_query_info {
      GEN_PERF_QUERY_TYPE_PIPELINE,
   } kind;
   const char *name;
+   const char *symbol_name;
   const char *guid;
   struct gen_perf_query_counter *counters;
   int n_counters;
@ -227,6 +228,11 @@ struct gen_perf_config {
    */
   struct hash_table *oa_metrics_table;

+   /* When MDAPI hasn't configured the metric we need to use by the time the
+    * query begins, this OA metric is used as a fallback.
+    */
+   uint64_t fallback_raw_oa_metric;
+
   /* Location of the device's sysfs entry. */
   char sysfs_dev_dir[256];

--- a/lib/mesa/src/intel/perf/gen_perf.py
+++ b/lib/mesa/src/intel/perf/gen_perf.py
@ -667,6 +667,7 @@ def main():

            c(".kind = GEN_PERF_QUERY_TYPE_OA,\n")
            c(".name = \"" + set.name + "\",\n")
+            c(".symbol_name = \"" + set.symbol_name + "\",\n")
            c(".guid = \"" + set.hw_config_guid + "\",\n")

            c(".counters = {0}_{1}_query_counters,".format(gen.chipset, set.underscore_name))
--- a/lib/mesa/src/intel/perf/gen_perf_query.c
+++ b/lib/mesa/src/intel/perf/gen_perf_query.c
@ -423,7 +423,7 @@ get_metric_id(struct gen_perf_config *perf,
   if (!gen_perf_load_metric_id(perf, query->guid,
                                &raw_query->oa_metrics_set_id)) {
      DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
-      raw_query->oa_metrics_set_id = 1ULL;
+      raw_query->oa_metrics_set_id = perf->fallback_raw_oa_metric;
   } else {
      DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n",
          query->name, query->guid, query->oa_metrics_set_id);
--- a/lib/mesa/src/panfrost/bifrost/bifrost_compile.c
+++ b/lib/mesa/src/panfrost/bifrost/bifrost_compile.c
@ -1076,7 +1076,7 @@ bifrost_compile_shader_nir(nir_shader *nir, panfrost_program *program, unsigned
        bi_optimize_nir(nir);
        nir_print_shader(nir, stdout);

-        panfrost_nir_assign_sysvals(&ctx->sysvals, nir);
+        panfrost_nir_assign_sysvals(&ctx->sysvals, ctx, nir);
        program->sysval_count = ctx->sysvals.sysval_count;
        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
        ctx->blend_types = program->blend_types;
--- a/lib/mesa/src/panfrost/midgard/compiler.h
+++ b/lib/mesa/src/panfrost/midgard/compiler.h
@ -257,8 +257,6 @@ typedef struct compiler_context {
        /* Constants which have been loaded, for later inlining */
        struct hash_table_u64 *ssa_constants;

-        /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
-        struct hash_table_u64 *hash_to_temp;
        int temp_count;
        int max_hash;

--- a/lib/mesa/src/panfrost/midgard/midgard_compile.c
+++ b/lib/mesa/src/panfrost/midgard/midgard_compile.c
@ -2562,7 +2562,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
        /* Initialize at a global (not block) level hash tables */

        ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
-        ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);

        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
         * (so we don't accidentally duplicate the epilogue since mesa/st has
@ -2598,7 +2597,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
        /* Assign sysvals and counts, now that we're sure
         * (post-optimisation) */

-        panfrost_nir_assign_sysvals(&ctx->sysvals, nir);
+        panfrost_nir_assign_sysvals(&ctx->sysvals, ctx, nir);
        program->sysval_count = ctx->sysvals.sysval_count;
        memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);

--- a/lib/mesa/src/panfrost/midgard/midgard_opt_perspective.c
+++ b/lib/mesa/src/panfrost/midgard/midgard_opt_perspective.c
@ -88,6 +88,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
                }

                if (!frcp_found) continue;
+                if (frcp_from != ins->src[0]) continue;
                if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
                if (!mir_single_use(ctx, frcp)) continue;

--- a/lib/mesa/src/panfrost/midgard/mir_squeeze.c
+++ b/lib/mesa/src/panfrost/midgard/mir_squeeze.c
@ -30,13 +30,14 @@
 * as such */

 static unsigned
-find_or_allocate_temp(compiler_context *ctx, unsigned hash)
+find_or_allocate_temp(compiler_context *ctx, struct hash_table_u64 *map,
+                unsigned hash)
 {
        if (hash >= SSA_FIXED_MINIMUM)
                return hash;

        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(
-                                ctx->hash_to_temp, hash + 1);
+                                map, hash + 1);

        if (temp)
                return temp - 1;
@ -45,7 +46,7 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
        temp = ctx->temp_count++;
        ctx->max_hash = MAX2(ctx->max_hash, hash);

-        _mesa_hash_table_u64_insert(ctx->hash_to_temp,
+        _mesa_hash_table_u64_insert(map,
                                    hash + 1, (void *) ((uintptr_t) temp + 1));

        return temp;
@ -57,10 +58,10 @@ find_or_allocate_temp(compiler_context *ctx, unsigned hash)
 void
 mir_squeeze_index(compiler_context *ctx)
 {
+        struct hash_table_u64 *map = _mesa_hash_table_u64_create(NULL);
+
        /* Reset */
        ctx->temp_count = 0;
-        /* TODO don't leak old hash_to_temp */
-        ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);

        /* We need to prioritize texture registers on older GPUs so we don't
         * fail RA trying to assign to work registers r0/r1 when a work
@ -68,14 +69,16 @@ mir_squeeze_index(compiler_context *ctx)

        mir_foreach_instr_global(ctx, ins) {
                if (ins->type == TAG_TEXTURE_4)
-                        ins->dest = find_or_allocate_temp(ctx, ins->dest);
+                        ins->dest = find_or_allocate_temp(ctx, map, ins->dest);
        }

        mir_foreach_instr_global(ctx, ins) {
                if (ins->type != TAG_TEXTURE_4)
-                        ins->dest = find_or_allocate_temp(ctx, ins->dest);
+                        ins->dest = find_or_allocate_temp(ctx, map, ins->dest);

                for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
-                        ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]);
+                        ins->src[i] = find_or_allocate_temp(ctx, map, ins->src[i]);
        }
+
+        _mesa_hash_table_u64_destroy(map, NULL);
 }
--- a/lib/mesa/src/panfrost/util/pan_ir.h
+++ b/lib/mesa/src/panfrost/util/pan_ir.h
@ -77,7 +77,7 @@ struct panfrost_sysvals {
 };

 void
-panfrost_nir_assign_sysvals(struct panfrost_sysvals *ctx, nir_shader *shader);
+panfrost_nir_assign_sysvals(struct panfrost_sysvals *ctx, void *memctx, nir_shader *shader);

 int
 panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
--- a/lib/mesa/src/panfrost/util/pan_liveness.c
+++ b/lib/mesa/src/panfrost/util/pan_liveness.c
@ -128,11 +128,13 @@ pan_compute_liveness(
                        _mesa_hash_pointer,
                        _mesa_key_pointer_equal);

-        /* Allocate */
+        /* Free any previous liveness, and allocate */
+
+        pan_free_liveness(blocks);

        list_for_each_entry(pan_block, block, blocks, link) {
-                block->live_in = rzalloc_array(NULL, uint16_t, temp_count);
-                block->live_out = rzalloc_array(NULL, uint16_t, temp_count);
+                block->live_in = rzalloc_array(block, uint16_t, temp_count);
+                block->live_out = rzalloc_array(block, uint16_t, temp_count);
        }

        /* Initialize the work list with the exit block */
--- a/lib/mesa/src/panfrost/util/pan_sysval.c
+++ b/lib/mesa/src/panfrost/util/pan_sysval.c
@ -124,10 +124,10 @@ panfrost_nir_assign_sysval_body(struct panfrost_sysvals *ctx, nir_instr *instr)
 }

 void
-panfrost_nir_assign_sysvals(struct panfrost_sysvals *ctx, nir_shader *shader)
+panfrost_nir_assign_sysvals(struct panfrost_sysvals *ctx, void *memctx, nir_shader *shader)
 {
        ctx->sysval_count = 0;
-        ctx->sysval_to_id = _mesa_hash_table_u64_create(NULL);
+        ctx->sysval_to_id = _mesa_hash_table_u64_create(memctx);

        nir_foreach_function(function, shader) {
                if (!function->impl) continue;
--- a/lib/mesa/src/util/00-mesa-defaults.conf
+++ b/lib/mesa/src/util/00-mesa-defaults.conf
@ -637,6 +637,12 @@ TODO: document the other workarounds.
        <application name="Peace, Death!" executable="runner" sha1="5b909f3d21799773370adf084f649848f098234e">
            <option name="radeonsi_sync_compile" value="true" />
        </application>
+        <application name="SPECviewperf13" executable="viewperf">
+            <option name="radeonsi_clamp_div_by_zero" value="true" />
+        </application>
+        <application name="Road Redemption" executable="RoadRedemption.x86_64">
+            <option name="radeonsi_clamp_div_by_zero" value="true" />
+        </application>
    </device>
    <device driver="virtio_gpu">
        <!-- Some Valve games do a final blit to a BRGA_sRGB surface. On a GLES
--- a/lib/mesa/src/vulkan/device-select-layer/device_select_layer.c
+++ b/lib/mesa/src/vulkan/device-select-layer/device_select_layer.c
@ -137,14 +137,17 @@ static VkResult device_select_CreateInstance(const VkInstanceCreateInfo *pCreate
   PFN_vkCreateInstance fpCreateInstance =
      (PFN_vkCreateInstance)info->GetInstanceProcAddr(NULL, "vkCreateInstance");
   if (fpCreateInstance == NULL) {
+      free(info);
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;

   VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance);
-   if (result != VK_SUCCESS)
+   if (result != VK_SUCCESS) {
+      free(info);
      return result;
+   }

   for (unsigned i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
      if (!strcmp(pCreateInfo->ppEnabledExtensionNames[i], VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))