llvm.org GIT mirror llvm / 34e11f6
AMDGPU/GlobalISel: Legalize fast unsafe FDIV Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69231 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375460 91177308-0d34-0410-b5e6-96231b3b80d8 Austin Kerbow 1 year, 1 month ago
4 changed file(s) with 891 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
13851385 }
13861386
13871387 /// Build and insert \p Res = G_FNEG \p Op0
1388 MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
1389 return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
1388 MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0,
1389 Optional Flags = None) {
1390 return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags);
13901391 }
13911392
13921393 /// Build and insert \p Res = G_FABS \p Op0
335335 .legalFor({S32, S64});
336336 auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
337337 .customFor({S32, S64});
338 auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV)
339 .customFor({S32, S64});
338340
339341 if (ST.has16BitInsts()) {
340342 if (ST.hasVOP3PInsts())
343345 FPOpActions.legalFor({S16});
344346
345347 TrigActions.customFor({S16});
348 FDIVActions.customFor({S16});
346349 }
347350
348351 auto &MinNumMaxNum = getActionDefinitionsBuilder({
371374 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
372375
373376 TrigActions
377 .scalarize(0)
378 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
379
380 FDIVActions
374381 .scalarize(0)
375382 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
376383
11061113 return legalizeLoad(MI, MRI, B, Observer);
11071114 case TargetOpcode::G_FMAD:
11081115 return legalizeFMad(MI, MRI, B);
1116 case TargetOpcode::G_FDIV:
1117 return legalizeFDIV(MI, MRI, B);
11091118 default:
11101119 return false;
11111120 }
18091818 return false;
18101819 }
18111820
1812 bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI,
1813 MachineRegisterInfo &MRI,
1814 MachineIRBuilder &B) const {
1821 bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
1822 MachineRegisterInfo &MRI,
1823 MachineIRBuilder &B) const {
1824 B.setInstr(MI);
1825
1826 if (legalizeFastUnsafeFDIV(MI, MRI, B))
1827 return true;
1828
1829 return false;
1830 }
1831
1832 bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
1833 MachineRegisterInfo &MRI,
1834 MachineIRBuilder &B) const {
1835 Register Res = MI.getOperand(0).getReg();
1836 Register LHS = MI.getOperand(1).getReg();
1837 Register RHS = MI.getOperand(2).getReg();
1838
1839 uint16_t Flags = MI.getFlags();
1840
1841 LLT ResTy = MRI.getType(Res);
1842 LLT S32 = LLT::scalar(32);
1843 LLT S64 = LLT::scalar(64);
1844
1845 const MachineFunction &MF = B.getMF();
1846 bool Unsafe =
1847 MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp);
1848
1849 if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
1850 return false;
1851
1852 if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
1853 return false;
1854
1855 if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
1856 // 1 / x -> RCP(x)
1857 if (CLHS->isExactlyValue(1.0)) {
1858 B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1859 .addUse(RHS)
1860 .setMIFlags(Flags);
1861
1862 MI.eraseFromParent();
1863 return true;
1864 }
1865
1866 // -1 / x -> RCP( FNEG(x) )
1867 if (CLHS->isExactlyValue(-1.0)) {
1868 auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
1869 B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1870 .addUse(FNeg.getReg(0))
1871 .setMIFlags(Flags);
1872
1873 MI.eraseFromParent();
1874 return true;
1875 }
1876 }
1877
1878 // x / y -> x * (1.0 / y)
1879 if (Unsafe) {
1880 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
1881 .addUse(RHS)
1882 .setMIFlags(Flags);
1883 B.buildFMul(Res, LHS, RCP, Flags);
1884
1885 MI.eraseFromParent();
1886 return true;
1887 }
1888
1889 return false;
1890 }
1891
1892 bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
1893 MachineRegisterInfo &MRI,
1894 MachineIRBuilder &B) const {
18151895 B.setInstr(MI);
18161896 Register Res = MI.getOperand(0).getReg();
18171897 Register LHS = MI.getOperand(2).getReg();
20282108 return legalizePreloadedArgIntrin(MI, MRI, B,
20292109 AMDGPUFunctionArgInfo::DISPATCH_ID);
20302110 case Intrinsic::amdgcn_fdiv_fast:
2031 return legalizeFDIVFast(MI, MRI, B);
2111 return legalizeFDIVFastIntrin(MI, MRI, B);
20322112 case Intrinsic::amdgcn_is_shared:
20332113 return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
20342114 case Intrinsic::amdgcn_is_private:
8080 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
8181 AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
8282
83 bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &B) const;
83 bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &B) const;
85 bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
86 MachineIRBuilder &B) const;
87 bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
88 MachineIRBuilder &B) const;
8589
8690 bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
8791 MachineIRBuilder &B) const;
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
2 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
3 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
4 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s
5
6 ---
7 name: test_fast_unsafe_fdiv_s16
8 body: |
9 bb.0:
10 liveins: $vgpr0, $vgpr1
11
12 ; SI-LABEL: name: test_fast_unsafe_fdiv_s16
13 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
14 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
15 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
16 ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
17 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
18 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
19 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
20 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32)
21 ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
22 ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
23 ; VI-LABEL: name: test_fast_unsafe_fdiv_s16
24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
25 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
26 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
27 ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
28 ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[TRUNC]], [[TRUNC1]]
29 ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FDIV]](s16)
30 ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
31 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16
32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
33 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
34 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
35 ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
36 ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[TRUNC]], [[TRUNC1]]
37 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FDIV]](s16)
38 ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
39 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16
40 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
41 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
42 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
43 ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
44 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16)
45 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]]
46 ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16)
47 ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32)
48 %0:_(s32) = COPY $vgpr0
49 %1:_(s32) = COPY $vgpr1
50 %2:_(s16) = G_TRUNC %0
51 %3:_(s16) = G_TRUNC %1
52 %4:_(s16) = G_FDIV %2, %3
53 %5:_(s32) = G_ANYEXT %4
54 $vgpr0 = COPY %5
55 ...
56
57 ---
58 name: test_fast_unsafe_fdiv_s32
59 body: |
60 bb.0:
61 liveins: $vgpr0, $vgpr1
62
63 ; SI-LABEL: name: test_fast_unsafe_fdiv_s32
64 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
65 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
66 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]]
67 ; SI: $vgpr0 = COPY [[FDIV]](s32)
68 ; VI-LABEL: name: test_fast_unsafe_fdiv_s32
69 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
70 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
71 ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]]
72 ; VI: $vgpr0 = COPY [[FDIV]](s32)
73 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32
74 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
75 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
76 ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]]
77 ; GFX9: $vgpr0 = COPY [[FDIV]](s32)
78 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32
79 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
80 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
81 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
82 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]]
83 ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32)
84 %0:_(s32) = COPY $vgpr0
85 %1:_(s32) = COPY $vgpr1
86 %2:_(s32) = G_FDIV %0, %1
87 $vgpr0 = COPY %2
88 ...
89
90 ---
91 name: test_fast_unsafe_fdiv_s32_arcp
92 body: |
93 bb.0:
94 liveins: $vgpr0, $vgpr1
95
96 ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_arcp
97 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
98 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
99 ; SI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
100 ; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
101 ; SI: $vgpr0 = COPY [[FMUL]](s32)
102 ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_arcp
103 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
104 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
105 ; VI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
106 ; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
107 ; VI: $vgpr0 = COPY [[FMUL]](s32)
108 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_arcp
109 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
110 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
111 ; GFX9: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
112 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
113 ; GFX9: $vgpr0 = COPY [[FMUL]](s32)
114 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_arcp
115 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
116 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
117 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32)
118 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]]
119 ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32)
120 %0:_(s32) = COPY $vgpr0
121 %1:_(s32) = COPY $vgpr1
122 %2:_(s32) = arcp G_FDIV %0, %1
123 $vgpr0 = COPY %2
124 ...
125
126 ---
127 name: test_fast_unsafe_fdiv_s64
128 body: |
129 bb.0:
130 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
131
132 ; SI-LABEL: name: test_fast_unsafe_fdiv_s64
133 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
134 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
135 ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
136 ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
137 ; VI-LABEL: name: test_fast_unsafe_fdiv_s64
138 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
139 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
140 ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
141 ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
142 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64
143 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
144 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
145 ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
146 ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
147 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64
148 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
149 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
150 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64)
151 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[INT]]
152 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMUL]](s64)
153 %0:_(s64) = COPY $vgpr0_vgpr1
154 %1:_(s64) = COPY $vgpr2_vgpr3
155 %2:_(s64) = G_FDIV %0, %1
156 $vgpr0_vgpr1 = COPY %2
157 ...
158
159 ---
160 name: test_fast_unsafe_fdiv_v2s32
161 body: |
162 bb.0:
163 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
164
165 ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s32
166 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
167 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
168 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
169 ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
170 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]]
171 ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]]
172 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
173 ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
174 ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s32
175 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
176 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
177 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
178 ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
179 ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]]
180 ; VI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]]
181 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
182 ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
183 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s32
184 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
185 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
186 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
187 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
188 ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV2]]
189 ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV3]]
190 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
191 ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
192 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s32
193 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
194 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
195 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
196 ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
197 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32)
198 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]]
199 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32)
200 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]]
201 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32)
202 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
203 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
204 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
205 %2:_(<2 x s32>) = G_FDIV %0, %1
206 $vgpr0_vgpr1 = COPY %2
207 ...
208
209 ---
210 name: test_fast_unsafe_fdiv_v2s32_flags
211 body: |
212 bb.0:
213 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
214
215 ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags
216 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
217 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
218 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
219 ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
220 ; SI: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]]
221 ; SI: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]]
222 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
223 ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
224 ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags
225 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
226 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
227 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
228 ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
229 ; VI: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]]
230 ; VI: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]]
231 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
232 ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
233 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags
234 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
235 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
236 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
237 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
238 ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV]], [[UV2]]
239 ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = nnan G_FDIV [[UV1]], [[UV3]]
240 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32)
241 ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
242 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s32_flags
243 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
244 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
245 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
246 ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
247 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32)
248 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]]
249 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32)
250 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]]
251 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32)
252 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
253 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
254 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
255 %2:_(<2 x s32>) = nnan G_FDIV %0, %1
256 $vgpr0_vgpr1 = COPY %2
257 ...
258
259 ---
260 name: test_fast_unsafe_fdiv_v3s32
261 body: |
262 bb.0:
263 liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
264
265 ; SI-LABEL: name: test_fast_unsafe_fdiv_v3s32
266 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
267 ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
268 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
269 ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
270 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]]
271 ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]]
272 ; SI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]]
273 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32)
274 ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
275 ; VI-LABEL: name: test_fast_unsafe_fdiv_v3s32
276 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
277 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
278 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
279 ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
280 ; VI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]]
281 ; VI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]]
282 ; VI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]]
283 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32)
284 ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
285 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v3s32
286 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
287 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
288 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
289 ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
290 ; GFX9: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[UV]], [[UV3]]
291 ; GFX9: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[UV1]], [[UV4]]
292 ; GFX9: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[UV2]], [[UV5]]
293 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FDIV]](s32), [[FDIV1]](s32), [[FDIV2]](s32)
294 ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
295 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v3s32
296 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
297 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
298 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
299 ; GFX9-UNSAFE: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
300 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32)
301 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]]
302 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32)
303 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]]
304 ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32)
305 ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]]
306 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32)
307 ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
308 %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
309 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
310 %2:_(<3 x s32>) = G_FDIV %0, %1
311 $vgpr0_vgpr1_vgpr2 = COPY %2
312 ...
313
314 ---
315 name: test_fast_unsafe_fdiv_v2s64
316 body: |
317 bb.0:
318 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
319
320 ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s64
321 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
322 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
323 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
324 ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
325 ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
326 ; SI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
327 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
328 ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
329 ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s64
330 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
331 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
332 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
333 ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
334 ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
335 ; VI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
336 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
337 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
338 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s64
339 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
340 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
341 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
342 ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
343 ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
344 ; GFX9: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
345 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
346 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
347 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s64
348 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
349 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
350 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
351 ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
352 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64)
353 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[INT]]
354 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64)
355 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[INT1]]
356 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64)
357 ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
358 %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
359 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
360 %2:_(<2 x s64>) = G_FDIV %0, %1
361 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
362 ...
363
364 ---
365 name: test_fast_unsafe_fdiv_v2s16
366 body: |
367 bb.0:
368 liveins: $vgpr0, $vgpr1
369
370 ; SI-LABEL: name: test_fast_unsafe_fdiv_v2s16
371 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
372 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
373 ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
374 ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
375 ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]]
376 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
377 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
378 ; SI: [[FDIV1:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
379 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV1]](s32)
380 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FPTRUNC]](s16)
381 ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
382 ; VI-LABEL: name: test_fast_unsafe_fdiv_v2s16
383 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
384 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
385 ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
386 ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
387 ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]]
388 ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV3]]
389 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16)
390 ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
391 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v2s16
392 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
393 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
394 ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
395 ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
396 ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV2]]
397 ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV3]]
398 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16)
399 ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
400 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v2s16
401 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
402 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
403 ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
404 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
405 ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
406 ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
407 ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
408 ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
409 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
410 ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
411 ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
412 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16)
413 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]]
414 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16)
415 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]]
416 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16)
417 ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
418 %0:_(<2 x s16>) = COPY $vgpr0
419 %1:_(<2 x s16>) = COPY $vgpr1
420 %2:_(<2 x s16>) = G_FDIV %0, %1
421 $vgpr0 = COPY %2
422 ...
423
424 ---
425 name: test_fast_unsafe_fdiv_v3s16
426 body: |
427 bb.0:
428 liveins: $vgpr0, $vgpr1
429
430 ; SI-LABEL: name: test_fast_unsafe_fdiv_v3s16
431 ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
432 ; SI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
433 ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
434 ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>)
435 ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]]
436 ; SI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]]
437 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
438 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
439 ; SI: [[FDIV2:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
440 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV2]](s32)
441 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FPTRUNC]](s16)
442 ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
443 ; VI-LABEL: name: test_fast_unsafe_fdiv_v3s16
444 ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
445 ; VI: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
446 ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
447 ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>)
448 ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]]
449 ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]]
450 ; VI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV5]]
451 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16)
452 ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
453 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v3s16
454 ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
455 ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
456 ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
457 ; GFX9: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<3 x s16>)
458 ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV3]]
459 ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV4]]
460 ; GFX9: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV5]]
461 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16)
462 ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
463 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v3s16
464 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
465 ; GFX9-UNSAFE: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
466 ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
467 ; GFX9-UNSAFE: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
468 ; GFX9-UNSAFE: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
469 ; GFX9-UNSAFE: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
470 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
471 ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
472 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
473 ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
474 ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
475 ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
476 ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
477 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
478 ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
479 ; GFX9-UNSAFE: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
480 ; GFX9-UNSAFE: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
481 ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
482 ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
483 ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
484 ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
485 ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
486 ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
487 ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
488 ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
489 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16)
490 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]]
491 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16)
492 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]]
493 ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16)
494 ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]]
495 ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
496 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16)
497 ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16)
498 ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
499 ; GFX9-UNSAFE: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
500 ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
501 %0:_(<3 x s16>) = G_IMPLICIT_DEF
502 %1:_(<3 x s16>) = G_IMPLICIT_DEF
503 %2:_(<3 x s16>) = G_FDIV %0, %1
504 S_NOP 0, implicit %2
505 ...
506
507 ---
508 name: test_fast_unsafe_fdiv_v4s16
509 body: |
510 bb.0:
511 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
512
513 ; SI-LABEL: name: test_fast_unsafe_fdiv_v4s16
514 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
515 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
516 ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
517 ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
518 ; SI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]]
519 ; SI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]]
520 ; SI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]]
521 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
522 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
523 ; SI: [[FDIV3:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
524 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV3]](s32)
525 ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FPTRUNC]](s16)
526 ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
527 ; VI-LABEL: name: test_fast_unsafe_fdiv_v4s16
528 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
529 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
530 ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
531 ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
532 ; VI: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]]
533 ; VI: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]]
534 ; VI: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]]
535 ; VI: [[FDIV3:%[0-9]+]]:_(s16) = G_FDIV [[UV3]], [[UV7]]
536 ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FDIV3]](s16)
537 ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
538 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_v4s16
539 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
540 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
541 ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
542 ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
543 ; GFX9: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[UV]], [[UV4]]
544 ; GFX9: [[FDIV1:%[0-9]+]]:_(s16) = G_FDIV [[UV1]], [[UV5]]
545 ; GFX9: [[FDIV2:%[0-9]+]]:_(s16) = G_FDIV [[UV2]], [[UV6]]
546 ; GFX9: [[FDIV3:%[0-9]+]]:_(s16) = G_FDIV [[UV3]], [[UV7]]
547 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FDIV]](s16), [[FDIV1]](s16), [[FDIV2]](s16), [[FDIV3]](s16)
548 ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
549 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_v4s16
550 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
551 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
552 ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
553 ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
554 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
555 ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
556 ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
557 ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
558 ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
559 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
560 ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
561 ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
562 ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
563 ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
564 ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
565 ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
566 ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
567 ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
568 ; GFX9-UNSAFE: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
569 ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
570 ; GFX9-UNSAFE: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
571 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16)
572 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]]
573 ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16)
574 ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]]
575 ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16)
576 ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]]
577 ; GFX9-UNSAFE: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16)
578 ; GFX9-UNSAFE: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]]
579 ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16)
580 ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16)
581 ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
582 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
583 %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
584 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
585 %2:_(<4 x s16>) = G_FDIV %0, %1
586 $vgpr0_vgpr1 = COPY %2
587 ...
588
589 ---
590 name: test_fast_unsafe_fdiv_s16_constant_one_rcp
591 body: |
592 bb.0:
593 liveins: $vgpr0
594
595 ; SI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp
596 ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
597 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
598 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
599 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
600 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
601 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
602 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32)
603 ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
604 ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
605 ; VI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp
606 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
607 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
608 ; VI: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
609 ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
610 ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
611 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp
612 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
613 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
614 ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
615 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
616 ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
617 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16_constant_one_rcp
618 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
619 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
620 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16)
621 ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
622 ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32)
623 %0:_(s16) = G_FCONSTANT half 1.0
624 %1:_(s32) = COPY $vgpr0
625 %2:_(s16) = G_TRUNC %1
626 %3:_(s16) = G_FDIV %0, %2
627 %4:_(s32) = G_ANYEXT %3
628 $vgpr0 = COPY %4
629 ...
630
631 ---
632 name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp
633 body: |
634 bb.0:
635 liveins: $vgpr0
636
637 ; SI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp
638 ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00
639 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
640 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
641 ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16)
642 ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
643 ; SI: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]]
644 ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FDIV]](s32)
645 ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
646 ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
647 ; VI-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp
648 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
649 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
650 ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
651 ; VI: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
652 ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
653 ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
654 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp
655 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
656 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
657 ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
658 ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
659 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
660 ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
661 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s16_constant_negative_one_rcp
662 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
663 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
664 ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
665 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16)
666 ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
667 ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32)
668 %0:_(s16) = G_FCONSTANT half -1.0
669 %1:_(s32) = COPY $vgpr0
670 %2:_(s16) = G_TRUNC %1
671 %3:_(s16) = G_FDIV %0, %2
672 %4:_(s32) = G_ANYEXT %3
673 $vgpr0 = COPY %4
674 ...
675
676 ---
677 name: test_fast_unsafe_fdiv_s32_constant_one_rcp
678 body: |
679 bb.0:
680 liveins: $vgpr0
681
682 ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp
683 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
684 ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
685 ; SI: $vgpr0 = COPY [[INT]](s32)
686 ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp
687 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
688 ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
689 ; VI: $vgpr0 = COPY [[INT]](s32)
690 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp
691 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
692 ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
693 ; GFX9: $vgpr0 = COPY [[INT]](s32)
694 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_constant_one_rcp
695 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
696 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32)
697 ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32)
698 %0:_(s32) = G_FCONSTANT float 1.0
699 %1:_(s32) = COPY $vgpr0
700 %2:_(s32) = G_FDIV %0, %1
701 $vgpr0 = COPY %2
702 ...
703
704 ---
705 name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp
706 body: |
707 bb.0:
708 liveins: $vgpr0
709
710 ; SI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp
711 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
712 ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
713 ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
714 ; SI: $vgpr0 = COPY [[INT]](s32)
715 ; VI-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp
716 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
717 ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
718 ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
719 ; VI: $vgpr0 = COPY [[INT]](s32)
720 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp
721 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
722 ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
723 ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
724 ; GFX9: $vgpr0 = COPY [[INT]](s32)
725 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s32_constant_negative_one_rcp
726 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
727 ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
728 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
729 ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32)
730 %0:_(s32) = G_FCONSTANT float -1.0
731 %1:_(s32) = COPY $vgpr0
732 %2:_(s32) = G_FDIV %0, %1
733 $vgpr0 = COPY %2
734 ...
735
736 ---
737 name: test_fast_unsafe_fdiv_s64_constant_one_rcp
738 body: |
739 bb.0:
740 liveins: $vgpr0_vgpr1
741
742 ; SI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp
743 ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
744 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
745 ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
746 ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
747 ; VI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp
748 ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
749 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
750 ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
751 ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
752 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp
753 ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
754 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
755 ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
756 ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
757 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64_constant_one_rcp
758 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
759 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64)
760 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[INT]](s64)
761 %0:_(s64) = G_FCONSTANT double 1.0
762 %1:_(s64) = COPY $vgpr0_vgpr1
763 %2:_(s64) = G_FDIV %0, %1
764 $vgpr0_vgpr1 = COPY %2
765 ...
766
767 ---
768 name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp
769 body: |
770 bb.0:
771 liveins: $vgpr0_vgpr1
772
773 ; SI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp
774 ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
775 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
776 ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
777 ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
778 ; VI-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp
779 ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
780 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
781 ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
782 ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
783 ; GFX9-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp
784 ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
785 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
786 ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
787 ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
788 ; GFX9-UNSAFE-LABEL: name: test_fast_unsafe_fdiv_s64_constant_negative_one_rcp
789 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
790 ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
791 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s64)
792 ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[INT]](s64)
793 %0:_(s64) = G_FCONSTANT double -1.0
794 %1:_(s64) = COPY $vgpr0_vgpr1
795 %2:_(s64) = G_FDIV %0, %1
796 $vgpr0_vgpr1 = COPY %2
797 ...