llvm/0001-SystemZ-TableGen-Fix-shift-count-handling.patch

From 2ac90db51fc323d183aabe744e57f4feca6d3008 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 1 Aug 2018 11:57:58 +0000
Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling

*Backport of this patch from trunk without the TableGen fix and modified
to work with LLVM 6.0 TableGen. *

The DAG combiner logic to simplify AND masks in shift counts is invalid.
While it is true that the SystemZ shift instructions ignore all but the
low 6 bits of the shift count, it is still invalid to simplify the AND
masks while the DAG still uses the standard shift operators (which are
*not* defined to match the SystemZ instruction behavior).

Instead, this patch performs equivalent operations during instruction
selection. For completely removing the AND, this now happens via
additional DAG match patterns implemented by a multi-alternative
PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG
patterns were already mostly OK, they just needed an output XForm to
actually truncate the immediate value.

Unfortunately, the latter change also exposed a bug in TableGen: it
seems XForms are currently only handled correctly for direct operands of
the outermost operation node. This patch also fixes that bug by simply
recurring through the whole pattern. This should be NFC for all other
targets.

Differential Revision: https://reviews.llvm.org/D50096

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ------------------------------
 lib/Target/SystemZ/SystemZISelLowering.h   |  1 -
 lib/Target/SystemZ/SystemZInstrInfo.td     | 49 +++++++++++++------
 lib/Target/SystemZ/SystemZOperands.td      |  1 +
 lib/Target/SystemZ/SystemZOperators.td     |  6 +++
 test/CodeGen/SystemZ/shift-12.ll           | 12 +++++
 utils/TableGen/CodeGenDAGPatterns.cpp      | 39 ++++++++-------
 7 files changed, 71 insertions(+), 115 deletions(-)

diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index adf3683..505b143 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -522,10 +522,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::BSWAP);
-  setTargetDAGCombine(ISD::SHL);
-  setTargetDAGCombine(ISD::SRA);
-  setTargetDAGCombine(ISD::SRL);
-  setTargetDAGCombine(ISD::ROTL);

   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -5405,76 +5401,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
   return SDValue();
 }

-SDValue SystemZTargetLowering::combineSHIFTROT(
-    SDNode *N, DAGCombinerInfo &DCI) const {
-
-  SelectionDAG &DAG = DCI.DAG;
-
-  // Shift/rotate instructions only use the last 6 bits of the second operand
-  // register. If the second operand is the result of an AND with an immediate
-  // value that has its last 6 bits set, we can safely remove the AND operation.
-  //
-  // If the AND operation doesn't have the last 6 bits set, we can't remove it
-  // entirely, but we can still truncate it to a 16-bit value. This prevents
-  // us from ending up with a NILL with a signed operand, which will cause the
-  // instruction printer to abort.
-  SDValue N1 = N->getOperand(1);
-  if (N1.getOpcode() == ISD::AND) {
-    SDValue AndMaskOp = N1->getOperand(1);
-    auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
-    // The AND mask is constant
-    if (AndMask) {
-      auto AmtVal = AndMask->getZExtValue();
-
-      // Bottom 6 bits are set
-      if ((AmtVal & 0x3f) == 0x3f) {
-        SDValue AndOp = N1->getOperand(0);
-
-        // This is the only use, so remove the node
-        if (N1.hasOneUse()) {
-          // Combine the AND away
-          DCI.CombineTo(N1.getNode(), AndOp);
-
-          // Return N so it isn't rechecked
-          return SDValue(N, 0);
-
-        // The node will be reused, so create a new node for this one use
-        } else {
-          SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
-                                        N->getValueType(0), N->getOperand(0),
-                                        AndOp);
-          DCI.AddToWorklist(Replace.getNode());
-
-          return Replace;
-        }
-
-      // We can't remove the AND, but we can use NILL here (normally we would
-      // use NILF). Only keep the last 16 bits of the mask. The actual
-      // transformation will be handled by .td definitions.
-      } else if (AmtVal >> 16 != 0) {
-        SDValue AndOp = N1->getOperand(0);
-
-        auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
-                                       SDLoc(AndMaskOp),
-                                       AndMaskOp.getValueType());
-
-        auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
-                                  AndOp, NewMask);
-
-        SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
-                                      N->getValueType(0), N->getOperand(0),
-                                      NewAnd);
-        DCI.AddToWorklist(Replace.getNode());
-
-        return Replace;
-      }
-    }
-  }
-
-  return SDValue();
-}
-
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
@@ -5487,10 +5413,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
-  case ISD::SHL:
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::ROTL:               return combineSHIFTROT(N, DCI);
   }

   return SDValue();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 2cdc88d..1918d45 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -570,7 +570,6 @@ private:
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
-  SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;

   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index abb8045..fb40cb4 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1318,9 +1318,20 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
 // Shifts
 //===----------------------------------------------------------------------===//

+// Complexity is 8 so we match it before the NILL paterns below.
+let AddedComplexity = 8 in {
+
+class ShiftAndPat <SDNode node, Instruction inst, ValueType vt> : Pat <
+  (node vt:$val, (and i32:$count, imm32bottom6set)),
+  (inst vt:$val, i32:$count, 0)
+>;
+}
+
 // Logical shift left.
 defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+def : ShiftAndPat <shl, SLL, i32>;
 def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+def : ShiftAndPat <shl, SLLG, i64>;
 def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;

 // Arithmetic shift left.
@@ -1332,7 +1343,9 @@ let Defs = [CC] in {

 // Logical shift right.
 defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+def : ShiftAndPat <srl, SRL, i32>;
 def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+def : ShiftAndPat <srl, SRLG, i64>;
 def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;

 // Arithmetic shift right.
@@ -1341,10 +1354,14 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
   def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
   def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
 }
+def : ShiftAndPat <sra, SRA, i32>;
+def : ShiftAndPat <sra, SRAG, i64>;

 // Rotate left.
 def RLL  : BinaryRSY<"rll",  0xEB1D, rotl, GR32>;
+def : ShiftAndPat <rotl, RLL, i32>;
 def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def : ShiftAndPat <rotl, RLLG, i64>;

 // Rotate second operand left and inserted selected bits into first operand.
 // These can act like 32-bit operands provided that the constant start and
@@ -2154,29 +2171,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
 // Complexity is added so that we match this before we match NILF on the AND
 // operation alone.
 let AddedComplexity = 4 in {
-  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;

-  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 }

 // Peepholes for turning scalar operations into block operations.
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 7136121..61a1124 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -341,6 +341,7 @@ def imm32zx16 : Immediate<i32, [{
 }], UIMM16, "U16Imm">;

 def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;

 // Full 32-bit immediates.  we need both signed and unsigned versions
 // because the assembler is picky.  E.g. AFI requires signed operands
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index d067f33..269c3d0 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -611,6 +611,12 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
   : PatFrag<(ops node:$addr),
             (store (operator), node:$addr)>;

+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+  return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+
 // Vector representation of all-zeros and all-ones.
 def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
 def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll
index 4ebc42b..53d3d53 100644
--- a/test/CodeGen/SystemZ/shift-12.ll
+++ b/test/CodeGen/SystemZ/shift-12.ll
@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) {
   %reuse = add i32 %and, %shift
   ret i32 %reuse
 }
+
+; Test that AND is not removed for i128 (which calls __ashlti3)
+define i128 @f11(i128 %a, i32 %sh) {
+; CHECK-LABEL: f11:
+; CHECK: risbg %r4, %r4, 57, 191, 0
+; CHECK: brasl %r14, __ashlti3@PLT
+  %and = and i32 %sh, 127
+  %ext = zext i32 %and to i128
+  %shift = shl i128 %a, %ext
+  ret i128 %shift
+}
+
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 493066e..74af62b 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -3919,6 +3919,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
   return false;
 }

+// Promote xform function to be an explicit node wherever set.
+static TreePatternNode* PromoteXForms(TreePatternNode* N) {
+  if (Record *Xform = N->getTransformFn()) {
+      N->setTransformFn(nullptr);
+      std::vector<TreePatternNode*> Children;
+      Children.push_back(PromoteXForms(N));
+      return new TreePatternNode(Xform, std::move(Children),
+                                               N->getNumTypes());
+  }
+
+  if (!N->isLeaf())
+    for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+      TreePatternNode* Child = N->getChild(i);
+      N->setChild(i, std::move(PromoteXForms(Child)));
+    }
+  return N;
+}
+
 void CodeGenDAGPatterns::ParsePatterns() {
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");

@@ -4009,26 +4027,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
                                   InstImpResults);

     // Promote the xform function to be an explicit node if set.
-    TreePatternNode *DstPattern = Result.getOnlyTree();
-    std::vector<TreePatternNode*> ResultNodeOperands;
-    for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
-      TreePatternNode *OpNode = DstPattern->getChild(ii);
-      if (Record *Xform = OpNode->getTransformFn()) {
-        OpNode->setTransformFn(nullptr);
-        std::vector<TreePatternNode*> Children;
-        Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
-      }
-      ResultNodeOperands.push_back(OpNode);
-    }
-    DstPattern = Result.getOnlyTree();
-    if (!DstPattern->isLeaf())
-      DstPattern = new TreePatternNode(DstPattern->getOperator(),
-                                       ResultNodeOperands,
-                                       DstPattern->getNumTypes());
-
-    for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
-      DstPattern->setType(i, Result.getOnlyTree()->getExtType(i));
+    TreePatternNode* DstPattern = PromoteXForms(Result.getOnlyTree());

     TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
     Temp.InferAllTypes();
--
1.8.3.1