llvm.org GIT mirror llvm / d79c539
[UpdateTestChecks] Add update_analyze_test_checks.py for cost model analysis generation The script allows the auto-generation of checks for cost model tests to speed up their creation and help improve coverage, which will help a lot with PR36550. If the need arises we can add support for other analyze passes as well, but the cost models was the one I needed to get done - at the moment it just warns that any other analysis mode is unsupported. I've regenerated a couple of x86 test files to show the effect. Differential Revision: https://reviews.llvm.org/D45272 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@329390 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 6 months ago
5 changed file(s) with 1742 addition(s) and 1103 deletion(s). Raw diff Collapse all Expand all
None ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
1 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
2 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
4 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
7 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM
0 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
1 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
2 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
3 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
4 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
5 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
6 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
7 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
8 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
89
910 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
1011 target triple = "x86_64-apple-macosx10.8.0"
1112
12 ; CHECK-LABEL: 'fadd'
1313 define i32 @fadd(i32 %arg) {
14 ; SSE2: cost of 2 {{.*}} %F32 = fadd
15 ; SSE42: cost of 1 {{.*}} %F32 = fadd
16 ; AVX: cost of 1 {{.*}} %F32 = fadd
17 ; AVX2: cost of 1 {{.*}} %F32 = fadd
18 ; AVX512: cost of 1 {{.*}} %F32 = fadd
19 ; SLM: cost of 1 {{.*}} %F32 = fadd
20 ; GLM: cost of 1 {{.*}} %F32 = fadd
14 ; SSE2-LABEL: 'fadd'
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
17 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
18 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef
19 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef
20 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
21 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
22 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef
23 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
24 ;
25 ; SSE42-LABEL: 'fadd'
26 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
27 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
28 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
29 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
30 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
31 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
33 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
34 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
35 ;
36 ; AVX1-LABEL: 'fadd'
37 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
41 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
42 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
43 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
44 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
45 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
46 ;
47 ; AVX2-LABEL: 'fadd'
48 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
49 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
50 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef
51 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fadd <16 x float> undef, undef
52 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
53 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
54 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef
55 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fadd <8 x double> undef, undef
56 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
57 ;
58 ; AVX512-LABEL: 'fadd'
59 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
60 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
61 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef
62 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef
63 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
65 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef
66 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef
67 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
68 ;
69 ; SLM-LABEL: 'fadd'
70 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
71 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
72 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
73 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
74 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
75 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
76 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
77 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef
78 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
79 ;
80 ; GLM-LABEL: 'fadd'
81 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
82 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
83 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
84 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
85 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
86 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
87 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
88 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
89 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
90 ;
2191 %F32 = fadd float undef, undef
22 ; SSE2: cost of 2 {{.*}} %V4F32 = fadd
23 ; SSE42: cost of 1 {{.*}} %V4F32 = fadd
24 ; AVX: cost of 1 {{.*}} %V4F32 = fadd
25 ; AVX2: cost of 1 {{.*}} %V4F32 = fadd
26 ; AVX512: cost of 1 {{.*}} %V4F32 = fadd
27 ; SLM: cost of 1 {{.*}} %V4F32 = fadd
28 ; GLM: cost of 1 {{.*}} %V4F32 = fadd
2992 %V4F32 = fadd <4 x float> undef, undef
30 ; SSE2: cost of 4 {{.*}} %V8F32 = fadd
31 ; SSE42: cost of 2 {{.*}} %V8F32 = fadd
32 ; AVX: cost of 2 {{.*}} %V8F32 = fadd
33 ; AVX2: cost of 1 {{.*}} %V8F32 = fadd
34 ; AVX512: cost of 1 {{.*}} %V8F32 = fadd
35 ; SLM: cost of 2 {{.*}} %V8F32 = fadd
36 ; GLM: cost of 2 {{.*}} %V8F32 = fadd
3793 %V8F32 = fadd <8 x float> undef, undef
38 ; SSE2: cost of 8 {{.*}} %V16F32 = fadd
39 ; SSE42: cost of 4 {{.*}} %V16F32 = fadd
40 ; AVX: cost of 4 {{.*}} %V16F32 = fadd
41 ; AVX2: cost of 2 {{.*}} %V16F32 = fadd
42 ; AVX512: cost of 1 {{.*}} %V16F32 = fadd
43 ; SLM: cost of 4 {{.*}} %V16F32 = fadd
44 ; GLM: cost of 4 {{.*}} %V16F32 = fadd
4594 %V16F32 = fadd <16 x float> undef, undef
4695
47 ; SSE2: cost of 2 {{.*}} %F64 = fadd
48 ; SSE42: cost of 1 {{.*}} %F64 = fadd
49 ; AVX: cost of 1 {{.*}} %F64 = fadd
50 ; AVX2: cost of 1 {{.*}} %F64 = fadd
51 ; AVX512: cost of 1 {{.*}} %F64 = fadd
52 ; SLM: cost of 1 {{.*}} %F64 = fadd
53 ; GLM: cost of 1 {{.*}} %F64 = fadd
5496 %F64 = fadd double undef, undef
55 ; SSE2: cost of 2 {{.*}} %V2F64 = fadd
56 ; SSE42: cost of 1 {{.*}} %V2F64 = fadd
57 ; AVX: cost of 1 {{.*}} %V2F64 = fadd
58 ; AVX2: cost of 1 {{.*}} %V2F64 = fadd
59 ; AVX512: cost of 1 {{.*}} %V2F64 = fadd
60 ; SLM: cost of 2 {{.*}} %V2F64 = fadd
61 ; GLM: cost of 1 {{.*}} %V2F64 = fadd
6297 %V2F64 = fadd <2 x double> undef, undef
63 ; SSE2: cost of 4 {{.*}} %V4F64 = fadd
64 ; SSE42: cost of 2 {{.*}} %V4F64 = fadd
65 ; AVX: cost of 2 {{.*}} %V4F64 = fadd
66 ; AVX2: cost of 1 {{.*}} %V4F64 = fadd
67 ; AVX512: cost of 1 {{.*}} %V4F64 = fadd
68 ; SLM: cost of 4 {{.*}} %V4F64 = fadd
69 ; GLM: cost of 2 {{.*}} %V4F64 = fadd
7098 %V4F64 = fadd <4 x double> undef, undef
71 ; SSE2: cost of 8 {{.*}} %V8F64 = fadd
72 ; SSE42: cost of 4 {{.*}} %V8F64 = fadd
73 ; AVX: cost of 4 {{.*}} %V8F64 = fadd
74 ; AVX2: cost of 2 {{.*}} %V8F64 = fadd
75 ; AVX512: cost of 1 {{.*}} %V8F64 = fadd
76 ; SLM: cost of 8 {{.*}} %V8F64 = fadd
77 ; GLM: cost of 4 {{.*}} %V8F64 = fadd
7899 %V8F64 = fadd <8 x double> undef, undef
79100
80101 ret i32 undef
81102 }
82103
83 ; CHECK-LABEL: 'fsub'
84104 define i32 @fsub(i32 %arg) {
85 ; SSE2: cost of 2 {{.*}} %F32 = fsub
86 ; SSE42: cost of 1 {{.*}} %F32 = fsub
87 ; AVX: cost of 1 {{.*}} %F32 = fsub
88 ; AVX2: cost of 1 {{.*}} %F32 = fsub
89 ; AVX512: cost of 1 {{.*}} %F32 = fsub
90 ; SLM: cost of 1 {{.*}} %F32 = fsub
91 ; GLM: cost of 1 {{.*}} %F32 = fsub
105 ; SSE2-LABEL: 'fsub'
106 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
107 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
108 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
109 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef
110 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef
111 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
112 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
113 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef
114 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
115 ;
116 ; SSE42-LABEL: 'fsub'
117 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
118 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
119 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
120 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
121 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
122 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
123 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
124 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
125 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
126 ;
127 ; AVX1-LABEL: 'fsub'
128 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
129 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
130 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
131 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
132 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
133 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
134 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
135 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
136 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
137 ;
138 ; AVX2-LABEL: 'fsub'
139 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
140 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
141 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef
142 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> undef, undef
143 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
144 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
145 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef
146 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> undef, undef
147 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
148 ;
149 ; AVX512-LABEL: 'fsub'
150 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
151 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
152 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef
153 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef
154 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
155 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
156 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef
157 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef
158 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
159 ;
160 ; SLM-LABEL: 'fsub'
161 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
162 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
163 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
164 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
165 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
166 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
167 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
168 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef
169 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
170 ;
171 ; GLM-LABEL: 'fsub'
172 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
173 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
174 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
175 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
176 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
177 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
178 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
179 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
180 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
181 ;
92182 %F32 = fsub float undef, undef
93 ; SSE2: cost of 2 {{.*}} %V4F32 = fsub
94 ; SSE42: cost of 1 {{.*}} %V4F32 = fsub
95 ; AVX: cost of 1 {{.*}} %V4F32 = fsub
96 ; AVX2: cost of 1 {{.*}} %V4F32 = fsub
97 ; AVX512: cost of 1 {{.*}} %V4F32 = fsub
98 ; SLM: cost of 1 {{.*}} %V4F32 = fsub
99 ; GLM: cost of 1 {{.*}} %V4F32 = fsub
100183 %V4F32 = fsub <4 x float> undef, undef
101 ; SSE2: cost of 4 {{.*}} %V8F32 = fsub
102 ; SSE42: cost of 2 {{.*}} %V8F32 = fsub
103 ; AVX: cost of 2 {{.*}} %V8F32 = fsub
104 ; AVX2: cost of 1 {{.*}} %V8F32 = fsub
105 ; AVX512: cost of 1 {{.*}} %V8F32 = fsub
106 ; SLM: cost of 2 {{.*}} %V8F32 = fsub
107 ; GLM: cost of 2 {{.*}} %V8F32 = fsub
108184 %V8F32 = fsub <8 x float> undef, undef
109 ; SSE2: cost of 8 {{.*}} %V16F32 = fsub
110 ; SSE42: cost of 4 {{.*}} %V16F32 = fsub
111 ; AVX: cost of 4 {{.*}} %V16F32 = fsub
112 ; AVX2: cost of 2 {{.*}} %V16F32 = fsub
113 ; AVX512: cost of 1 {{.*}} %V16F32 = fsub
114 ; SLM: cost of 4 {{.*}} %V16F32 = fsub
115 ; GLM: cost of 4 {{.*}} %V16F32 = fsub
116185 %V16F32 = fsub <16 x float> undef, undef
117186
118 ; SSE2: cost of 2 {{.*}} %F64 = fsub
119 ; SSE42: cost of 1 {{.*}} %F64 = fsub
120 ; AVX: cost of 1 {{.*}} %F64 = fsub
121 ; AVX2: cost of 1 {{.*}} %F64 = fsub
122 ; AVX512: cost of 1 {{.*}} %F64 = fsub
123 ; SLM: cost of 1 {{.*}} %F64 = fsub
124 ; GLM: cost of 1 {{.*}} %F64 = fsub
125187 %F64 = fsub double undef, undef
126 ; SSE2: cost of 2 {{.*}} %V2F64 = fsub
127 ; SSE42: cost of 1 {{.*}} %V2F64 = fsub
128 ; AVX: cost of 1 {{.*}} %V2F64 = fsub
129 ; AVX2: cost of 1 {{.*}} %V2F64 = fsub
130 ; AVX512: cost of 1 {{.*}} %V2F64 = fsub
131 ; SLM: cost of 2 {{.*}} %V2F64 = fsub
132 ; GLM: cost of 1 {{.*}} %V2F64 = fsub
133188 %V2F64 = fsub <2 x double> undef, undef
134 ; SSE2: cost of 4 {{.*}} %V4F64 = fsub
135 ; SSE42: cost of 2 {{.*}} %V4F64 = fsub
136 ; AVX: cost of 2 {{.*}} %V4F64 = fsub
137 ; AVX2: cost of 1 {{.*}} %V4F64 = fsub
138 ; AVX512: cost of 1 {{.*}} %V4F64 = fsub
139 ; SLM: cost of 4 {{.*}} %V4F64 = fsub
140 ; GLM: cost of 2 {{.*}} %V4F64 = fsub
141189 %V4F64 = fsub <4 x double> undef, undef
142 ; SSE2: cost of 8 {{.*}} %V8F64 = fsub
143 ; SSE42: cost of 4 {{.*}} %V8F64 = fsub
144 ; AVX: cost of 4 {{.*}} %V8F64 = fsub
145 ; AVX2: cost of 2 {{.*}} %V8F64 = fsub
146 ; AVX512: cost of 1 {{.*}} %V8F64 = fsub
147 ; SLM: cost of 8 {{.*}} %V8F64 = fsub
148 ; GLM: cost of 4 {{.*}} %V8F64 = fsub
149190 %V8F64 = fsub <8 x double> undef, undef
150191
151192 ret i32 undef
152193 }
153194
154 ; CHECK-LABEL: 'fmul'
155195 define i32 @fmul(i32 %arg) {
156 ; SSE2: cost of 2 {{.*}} %F32 = fmul
157 ; SSE42: cost of 1 {{.*}} %F32 = fmul
158 ; AVX: cost of 1 {{.*}} %F32 = fmul
159 ; AVX2: cost of 1 {{.*}} %F32 = fmul
160 ; AVX512: cost of 1 {{.*}} %F32 = fmul
161 ; SLM: cost of 1 {{.*}} %F32 = fmul
162 ; GLM: cost of 1 {{.*}} %F32 = fmul
196 ; SSE2-LABEL: 'fmul'
197 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
198 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
199 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
200 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> undef, undef
201 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
202 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
203 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
204 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef
205 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
206 ;
207 ; SSE42-LABEL: 'fmul'
208 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
209 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
210 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
211 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
212 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
213 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
214 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
215 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
216 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
217 ;
218 ; AVX1-LABEL: 'fmul'
219 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
220 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
221 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
222 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
223 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
224 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
225 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
226 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
227 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
228 ;
229 ; AVX2-LABEL: 'fmul'
230 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
231 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
232 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef
233 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fmul <16 x float> undef, undef
234 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
235 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
236 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef
237 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fmul <8 x double> undef, undef
238 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
239 ;
240 ; AVX512-LABEL: 'fmul'
241 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
242 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
243 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef
244 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef
245 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
246 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
247 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef
248 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef
249 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
250 ;
251 ; SLM-LABEL: 'fmul'
252 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
253 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
254 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
255 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> undef, undef
256 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
257 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fmul <2 x double> undef, undef
258 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fmul <4 x double> undef, undef
259 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fmul <8 x double> undef, undef
260 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
261 ;
262 ; GLM-LABEL: 'fmul'
263 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
264 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
265 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
266 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
267 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
268 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
269 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
270 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
271 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
272 ;
163273 %F32 = fmul float undef, undef
164 ; SSE2: cost of 2 {{.*}} %V4F32 = fmul
165 ; SSE42: cost of 1 {{.*}} %V4F32 = fmul
166 ; AVX: cost of 1 {{.*}} %V4F32 = fmul
167 ; AVX2: cost of 1 {{.*}} %V4F32 = fmul
168 ; AVX512: cost of 1 {{.*}} %V4F32 = fmul
169 ; SLM: cost of 2 {{.*}} %V4F32 = fmul
170 ; GLM: cost of 1 {{.*}} %V4F32 = fmul
171274 %V4F32 = fmul <4 x float> undef, undef
172 ; SSE2: cost of 4 {{.*}} %V8F32 = fmul
173 ; SSE42: cost of 2 {{.*}} %V8F32 = fmul
174 ; AVX: cost of 2 {{.*}} %V8F32 = fmul
175 ; AVX2: cost of 1 {{.*}} %V8F32 = fmul
176 ; AVX512: cost of 1 {{.*}} %V8F32 = fmul
177 ; SLM: cost of 4 {{.*}} %V8F32 = fmul
178 ; GLM: cost of 2 {{.*}} %V8F32 = fmul
179275 %V8F32 = fmul <8 x float> undef, undef
180 ; SSE2: cost of 8 {{.*}} %V16F32 = fmul
181 ; SSE42: cost of 4 {{.*}} %V16F32 = fmul
182 ; AVX: cost of 4 {{.*}} %V16F32 = fmul
183 ; AVX2: cost of 2 {{.*}} %V16F32 = fmul
184 ; AVX512: cost of 1 {{.*}} %V16F32 = fmul
185 ; SLM: cost of 8 {{.*}} %V16F32 = fmul
186 ; GLM: cost of 4 {{.*}} %V16F32 = fmul
187276 %V16F32 = fmul <16 x float> undef, undef
188277
189 ; SSE2: cost of 2 {{.*}} %F64 = fmul
190 ; SSE42: cost of 1 {{.*}} %F64 = fmul
191 ; AVX: cost of 1 {{.*}} %F64 = fmul
192 ; AVX2: cost of 1 {{.*}} %F64 = fmul
193 ; AVX512: cost of 1 {{.*}} %F64 = fmul
194 ; SLM: cost of 2 {{.*}} %F64 = fmul
195 ; GLM: cost of 1 {{.*}} %F64 = fmul
196278 %F64 = fmul double undef, undef
197 ; SSE2: cost of 2 {{.*}} %V2F64 = fmul
198 ; SSE42: cost of 1 {{.*}} %V2F64 = fmul
199 ; AVX: cost of 1 {{.*}} %V2F64 = fmul
200 ; AVX2: cost of 1 {{.*}} %V2F64 = fmul
201 ; AVX512: cost of 1 {{.*}} %V2F64 = fmul
202 ; SLM: cost of 4 {{.*}} %V2F64 = fmul
203 ; GLM: cost of 1 {{.*}} %V2F64 = fmul
204279 %V2F64 = fmul <2 x double> undef, undef
205 ; SSE2: cost of 4 {{.*}} %V4F64 = fmul
206 ; SSE42: cost of 2 {{.*}} %V4F64 = fmul
207 ; AVX: cost of 2 {{.*}} %V4F64 = fmul
208 ; AVX2: cost of 1 {{.*}} %V4F64 = fmul
209 ; AVX512: cost of 1 {{.*}} %V4F64 = fmul
210 ; SLM: cost of 8 {{.*}} %V4F64 = fmul
211 ; GLM: cost of 2 {{.*}} %V4F64 = fmul
212280 %V4F64 = fmul <4 x double> undef, undef
213 ; SSE2: cost of 8 {{.*}} %V8F64 = fmul
214 ; SSE42: cost of 4 {{.*}} %V8F64 = fmul
215 ; AVX: cost of 4 {{.*}} %V8F64 = fmul
216 ; AVX2: cost of 2 {{.*}} %V8F64 = fmul
217 ; AVX512: cost of 1 {{.*}} %V8F64 = fmul
218 ; SLM: cost of 16 {{.*}} %V8F64 = fmul
219 ; GLM: cost of 4 {{.*}} %V8F64 = fmul
220281 %V8F64 = fmul <8 x double> undef, undef
221282
222283 ret i32 undef
223284 }
224285
225 ; CHECK-LABEL: 'fdiv'
226286 define i32 @fdiv(i32 %arg) {
227 ; SSE2: cost of 23 {{.*}} %F32 = fdiv
228 ; SSE42: cost of 14 {{.*}} %F32 = fdiv
229 ; AVX: cost of 14 {{.*}} %F32 = fdiv
230 ; AVX2: cost of 7 {{.*}} %F32 = fdiv
231 ; AVX512: cost of 7 {{.*}} %F32 = fdiv
232 ; SLM: cost of 17 {{.*}} %F32 = fdiv
233 ; GLM: cost of 18 {{.*}} %F32 = fdiv
287 ; SSE2-LABEL: 'fdiv'
288 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %F32 = fdiv float undef, undef
289 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
290 ; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
291 ; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
292 ; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
293 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
294 ; SSE2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
295 ; SSE2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
296 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
297 ;
298 ; SSE42-LABEL: 'fdiv'
299 ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
300 ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
301 ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
302 ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
303 ; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
304 ; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
305 ; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
306 ; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
307 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
308 ;
309 ; AVX1-LABEL: 'fdiv'
310 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
311 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
312 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
313 ; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
314 ; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
315 ; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
316 ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
317 ; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
318 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
319 ;
320 ; AVX2-LABEL: 'fdiv'
321 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = fdiv float undef, undef
322 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fdiv <4 x float> undef, undef
323 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fdiv <8 x float> undef, undef
324 ; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = fdiv <16 x float> undef, undef
325 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
326 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
327 ; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = fdiv <4 x double> undef, undef
328 ; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = fdiv <8 x double> undef, undef
329 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
330 ;
331 ; AVX512-LABEL: 'fdiv'
332 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = fdiv float undef, undef
333 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fdiv <4 x float> undef, undef
334 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fdiv <8 x float> undef, undef
335 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fdiv <16 x float> undef, undef
336 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
337 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
338 ; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = fdiv <4 x double> undef, undef
339 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fdiv <8 x double> undef, undef
340 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
341 ;
342 ; SLM-LABEL: 'fdiv'
343 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %F32 = fdiv float undef, undef
344 ; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
345 ; SLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
346 ; SLM-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
347 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = fdiv double undef, undef
348 ; SLM-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
349 ; SLM-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
350 ; SLM-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
351 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
352 ;
353 ; GLM-LABEL: 'fdiv'
354 ; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = fdiv float undef, undef
355 ; GLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4F32 = fdiv <4 x float> undef, undef
356 ; GLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8F32 = fdiv <8 x float> undef, undef
357 ; GLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16F32 = fdiv <16 x float> undef, undef
358 ; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %F64 = fdiv double undef, undef
359 ; GLM-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V2F64 = fdiv <2 x double> undef, undef
360 ; GLM-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V4F64 = fdiv <4 x double> undef, undef
361 ; GLM-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V8F64 = fdiv <8 x double> undef, undef
362 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
363 ;
234364 %F32 = fdiv float undef, undef
235 ; SSE2: cost of 39 {{.*}} %V4F32 = fdiv
236 ; SSE42: cost of 14 {{.*}} %V4F32 = fdiv
237 ; AVX: cost of 14 {{.*}} %V4F32 = fdiv
238 ; AVX2: cost of 7 {{.*}} %V4F32 = fdiv
239 ; AVX512: cost of 7 {{.*}} %V4F32 = fdiv
240 ; SLM: cost of 39 {{.*}} %V4F32 = fdiv
241 ; GLM: cost of 35 {{.*}} %V4F32 = fdiv
242365 %V4F32 = fdiv <4 x float> undef, undef
243 ; SSE2: cost of 78 {{.*}} %V8F32 = fdiv
244 ; SSE42: cost of 28 {{.*}} %V8F32 = fdiv
245 ; AVX: cost of 28 {{.*}} %V8F32 = fdiv
246 ; AVX2: cost of 14 {{.*}} %V8F32 = fdiv
247 ; AVX512: cost of 14 {{.*}} %V8F32 = fdiv
248 ; SLM: cost of 78 {{.*}} %V8F32 = fdiv
249 ; GLM: cost of 70 {{.*}} %V8F32 = fdiv
250366 %V8F32 = fdiv <8 x float> undef, undef
251 ; SSE2: cost of 156 {{.*}} %V16F32 = fdiv
252 ; SSE42: cost of 56 {{.*}} %V16F32 = fdiv
253 ; AVX: cost of 56 {{.*}} %V16F32 = fdiv
254 ; AVX2: cost of 28 {{.*}} %V16F32 = fdiv
255 ; AVX512: cost of 2 {{.*}} %V16F32 = fdiv
256 ; SLM: cost of 156 {{.*}} %V16F32 = fdiv
257 ; GLM: cost of 140 {{.*}} %V16F32 = fdiv
258367 %V16F32 = fdiv <16 x float> undef, undef
259368
260 ; SSE2: cost of 38 {{.*}} %F64 = fdiv
261 ; SSE42: cost of 22 {{.*}} %F64 = fdiv
262 ; AVX: cost of 22 {{.*}} %F64 = fdiv
263 ; AVX2: cost of 14 {{.*}} %F64 = fdiv
264 ; AVX512: cost of 14 {{.*}} %F64 = fdiv
265 ; SLM: cost of 32 {{.*}} %F64 = fdiv
266 ; GLM: cost of 33 {{.*}} %F64 = fdiv
267369 %F64 = fdiv double undef, undef
268 ; SSE2: cost of 69 {{.*}} %V2F64 = fdiv
269 ; SSE42: cost of 22 {{.*}} %V2F64 = fdiv
270 ; AVX: cost of 22 {{.*}} %V2F64 = fdiv
271 ; AVX2: cost of 14 {{.*}} %V2F64 = fdiv
272 ; AVX512: cost of 14 {{.*}} %V2F64 = fdiv
273 ; SLM: cost of 69 {{.*}} %V2F64 = fdiv
274 ; GLM: cost of 65 {{.*}} %V2F64 = fdiv
275370 %V2F64 = fdiv <2 x double> undef, undef
276 ; SSE2: cost of 138 {{.*}} %V4F64 = fdiv
277 ; SSE42: cost of 44 {{.*}} %V4F64 = fdiv
278 ; AVX: cost of 44 {{.*}} %V4F64 = fdiv
279 ; AVX2: cost of 28 {{.*}} %V4F64 = fdiv
280 ; AVX512: cost of 28 {{.*}} %V4F64 = fdiv
281 ; SLM: cost of 138 {{.*}} %V4F64 = fdiv
282 ; GLM: cost of 130 {{.*}} %V4F64 = fdiv
283371 %V4F64 = fdiv <4 x double> undef, undef
284 ; SSE2: cost of 276 {{.*}} %V8F64 = fdiv
285 ; SSE42: cost of 88 {{.*}} %V8F64 = fdiv
286 ; AVX: cost of 88 {{.*}} %V8F64 = fdiv
287 ; AVX2: cost of 56 {{.*}} %V8F64 = fdiv
288 ; AVX512: cost of 2 {{.*}} %V8F64 = fdiv
289 ; SLM: cost of 276 {{.*}} %V8F64 = fdiv
290 ; GLM: cost of 260 {{.*}} %V8F64 = fdiv
291372 %V8F64 = fdiv <8 x double> undef, undef
292373
293374 ret i32 undef
294375 }
295376
296 ; CHECK-LABEL: 'frem'
297377 define i32 @frem(i32 %arg) {
298 ; SSE2: cost of 2 {{.*}} %F32 = frem
299 ; SSE42: cost of 2 {{.*}} %F32 = frem
300 ; AVX: cost of 2 {{.*}} %F32 = frem
301 ; AVX2: cost of 2 {{.*}} %F32 = frem
302 ; AVX512: cost of 2 {{.*}} %F32 = frem
303 ; SLM: cost of 2 {{.*}} %F32 = frem
304 ; GLM: cost of 2 {{.*}} %F32 = frem
378 ; SSE-LABEL: 'frem'
379 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
380 ; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
381 ; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef
382 ; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef
383 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
384 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
385 ; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
386 ; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
387 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
388 ;
389 ; AVX-LABEL: 'frem'
390 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
391 ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
392 ; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = frem <8 x float> undef, undef
393 ; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = frem <16 x float> undef, undef
394 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
395 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
396 ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef
397 ; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef
398 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
399 ;
400 ; AVX512-LABEL: 'frem'
401 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
402 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
403 ; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = frem <8 x float> undef, undef
404 ; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16F32 = frem <16 x float> undef, undef
405 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
406 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
407 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef
408 ; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef
409 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
410 ;
411 ; SLM-LABEL: 'frem'
412 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
413 ; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
414 ; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef
415 ; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef
416 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
417 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
418 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
419 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
420 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
421 ;
422 ; GLM-LABEL: 'frem'
423 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
424 ; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
425 ; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef
426 ; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef
427 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
428 ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
429 ; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
430 ; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
431 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
432 ;
305433 %F32 = frem float undef, undef
306 ; SSE2: cost of 14 {{.*}} %V4F32 = frem
307 ; SSE42: cost of 14 {{.*}} %V4F32 = frem
308 ; AVX: cost of 14 {{.*}} %V4F32 = frem
309 ; AVX2: cost of 14 {{.*}} %V4F32 = frem
310 ; AVX512: cost of 14 {{.*}} %V4F32 = frem
311 ; SLM: cost of 14 {{.*}} %V4F32 = frem
312 ; GLM: cost of 14 {{.*}} %V4F32 = frem
313434 %V4F32 = frem <4 x float> undef, undef
314 ; SSE2: cost of 28 {{.*}} %V8F32 = frem
315 ; SSE42: cost of 28 {{.*}} %V8F32 = frem
316 ; AVX: cost of 30 {{.*}} %V8F32 = frem
317 ; AVX2: cost of 30 {{.*}} %V8F32 = frem
318 ; AVX512: cost of 30 {{.*}} %V8F32 = frem
319 ; SLM: cost of 28 {{.*}} %V8F32 = frem
320 ; GLM: cost of 28 {{.*}} %V8F32 = frem
321435 %V8F32 = frem <8 x float> undef, undef
322 ; SSE2: cost of 56 {{.*}} %V16F32 = frem
323 ; SSE42: cost of 56 {{.*}} %V16F32 = frem
324 ; AVX: cost of 60 {{.*}} %V16F32 = frem
325 ; AVX2: cost of 60 {{.*}} %V16F32 = frem
326 ; AVX512: cost of 62 {{.*}} %V16F32 = frem
327 ; SLM: cost of 56 {{.*}} %V16F32 = frem
328 ; GLM: cost of 56 {{.*}} %V16F32 = frem
329436 %V16F32 = frem <16 x float> undef, undef
330437
331 ; SSE2: cost of 2 {{.*}} %F64 = frem
332 ; SSE42: cost of 2 {{.*}} %F64 = frem
333 ; AVX: cost of 2 {{.*}} %F64 = frem
334 ; AVX2: cost of 2 {{.*}} %F64 = frem
335 ; AVX512: cost of 2 {{.*}} %F64 = frem
336 ; SLM: cost of 2 {{.*}} %F64 = frem
337 ; GLM: cost of 2 {{.*}} %F64 = frem
338438 %F64 = frem double undef, undef
339 ; SSE2: cost of 6 {{.*}} %V2F64 = frem
340 ; SSE42: cost of 6 {{.*}} %V2F64 = frem
341 ; AVX: cost of 6 {{.*}} %V2F64 = frem
342 ; AVX2: cost of 6 {{.*}} %V2F64 = frem
343 ; AVX512: cost of 6 {{.*}} %V2F64 = frem
344 ; SLM: cost of 6 {{.*}} %V2F64 = frem
345 ; GLM: cost of 6 {{.*}} %V2F64 = frem
346439 %V2F64 = frem <2 x double> undef, undef
347 ; SSE2: cost of 12 {{.*}} %V4F64 = frem
348 ; SSE42: cost of 12 {{.*}} %V4F64 = frem
349 ; AVX: cost of 14 {{.*}} %V4F64 = frem
350 ; AVX2: cost of 14 {{.*}} %V4F64 = frem
351 ; AVX512: cost of 14 {{.*}} %V4F64 = frem
352 ; SLM: cost of 12 {{.*}} %V4F64 = frem
353 ; GLM: cost of 12 {{.*}} %V4F64 = frem
354440 %V4F64 = frem <4 x double> undef, undef
355 ; SSE2: cost of 24 {{.*}} %V8F64 = frem
356 ; SSE42: cost of 24 {{.*}} %V8F64 = frem
357 ; AVX: cost of 28 {{.*}} %V8F64 = frem
358 ; AVX2: cost of 28 {{.*}} %V8F64 = frem
359 ; AVX512: cost of 30 {{.*}} %V8F64 = frem
360 ; SLM: cost of 24 {{.*}} %V8F64 = frem
361 ; GLM: cost of 24 {{.*}} %V8F64 = frem
362441 %V8F64 = frem <8 x double> undef, undef
363442
364443 ret i32 undef
365444 }
366445
367 ; CHECK-LABEL: 'fsqrt'
368446 define i32 @fsqrt(i32 %arg) {
369 ; SSE2: cost of 28 {{.*}} %F32 = call float @llvm.sqrt.f32
370 ; SSE42: cost of 18 {{.*}} %F32 = call float @llvm.sqrt.f32
371 ; AVX: cost of 14 {{.*}} %F32 = call float @llvm.sqrt.f32
372 ; AVX2: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
373 ; AVX512: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
374 ; SLM: cost of 20 {{.*}} %F32 = call float @llvm.sqrt.f32
375 ; GLM: cost of 19 {{.*}} %F32 = call float @llvm.sqrt.f32
447 ; SSE2-LABEL: 'fsqrt'
448 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
449 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
450 ; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
451 ; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
452 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
453 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
454 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
455 ; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
456 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
457 ;
458 ; SSE42-LABEL: 'fsqrt'
459 ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
460 ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
461 ; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
462 ; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
463 ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
464 ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
465 ; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
466 ; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
467 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
468 ;
469 ; AVX1-LABEL: 'fsqrt'
470 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
471 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
472 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
473 ; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
474 ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
475 ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
476 ; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
477 ; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
478 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
479 ;
480 ; AVX2-LABEL: 'fsqrt'
481 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
482 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
483 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
484 ; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
485 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
486 ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
487 ; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
488 ; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
489 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
490 ;
491 ; AVX512-LABEL: 'fsqrt'
492 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
493 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
494 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
495 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
496 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
497 ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
498 ; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
499 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
500 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
501 ;
502 ; SLM-LABEL: 'fsqrt'
503 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
504 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
505 ; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
506 ; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
507 ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
508 ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
509 ; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
510 ; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
511 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
512 ;
513 ; GLM-LABEL: 'fsqrt'
514 ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
515 ; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
516 ; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
517 ; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
518 ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
519 ; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
520 ; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
521 ; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
522 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
523 ;
376524 %F32 = call float @llvm.sqrt.f32(float undef)
377 ; SSE2: cost of 56 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
378 ; SSE42: cost of 18 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
379 ; AVX: cost of 14 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
380 ; AVX2: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
381 ; AVX512: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
382 ; SLM: cost of 40 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
383 ; GLM: cost of 37 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
384525 %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
385 ; SSE2: cost of 112 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
386 ; SSE42: cost of 36 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
387 ; AVX: cost of 28 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
388 ; AVX2: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
389 ; AVX512: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
390 ; SLM: cost of 80 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
391 ; GLM: cost of 74 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
392526 %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
393 ; SSE2: cost of 224 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
394 ; SSE42: cost of 72 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
395 ; AVX: cost of 56 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
396 ; AVX2: cost of 28 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
397 ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
398 ; SLM: cost of 160 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
399 ; GLM: cost of 148 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
400527 %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
401528
402 ; SSE2: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
403 ; SSE42: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
404 ; AVX: cost of 21 {{.*}} %F64 = call double @llvm.sqrt.f64
405 ; AVX2: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
406 ; AVX512: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
407 ; SLM: cost of 35 {{.*}} %F64 = call double @llvm.sqrt.f64
408 ; GLM: cost of 34 {{.*}} %F64 = call double @llvm.sqrt.f64
409529 %F64 = call double @llvm.sqrt.f64(double undef)
410 ; SSE2: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
411 ; SSE42: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
412 ; AVX: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
413 ; AVX2: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
414 ; AVX512: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
415 ; SLM: cost of 70 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
416 ; GLM: cost of 67 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
417530 %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
418 ; SSE2: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
419 ; SSE42: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
420 ; AVX: cost of 43 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
421 ; AVX2: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
422 ; AVX512: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
423 ; SLM: cost of 140 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
424 ; GLM: cost of 134 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
425531 %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
426 ; SSE2: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
427 ; SSE42: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
428 ; AVX: cost of 86 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
429 ; AVX2: cost of 56 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
430 ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
431 ; SLM: cost of 280 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
432 ; GLM: cost of 268 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
433532 %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
434533
435534 ret i32 undef
436535 }
437536
438 ; CHECK-LABEL: 'fabs'
439537 define i32 @fabs(i32 %arg) {
440 ; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
441 ; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
442 ; AVX: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
443 ; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
444 ; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
445 ; SLM: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
446 ; GLM: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
538 ; SSE-LABEL: 'fabs'
539 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
540 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
541 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
542 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
543 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
544 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
545 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
546 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
547 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
548 ;
549 ; AVX-LABEL: 'fabs'
550 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
551 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
552 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
553 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
554 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
555 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
556 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
557 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
558 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
559 ;
560 ; AVX512-LABEL: 'fabs'
561 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
562 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
563 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
564 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
565 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
566 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
567 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
568 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
569 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
570 ;
571 ; SLM-LABEL: 'fabs'
572 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
573 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
574 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
575 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
576 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
577 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
578 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
579 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
580 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
581 ;
582 ; GLM-LABEL: 'fabs'
583 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
584 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
585 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
586 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
587 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
588 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
589 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
590 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
591 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
592 ;
447593 %F32 = call float @llvm.fabs.f32(float undef)
448 ; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
449 ; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
450 ; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
451 ; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
452 ; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
453 ; SLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
454 ; GLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
455594 %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
456 ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
457 ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
458 ; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
459 ; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
460 ; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
461 ; SLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
462 ; GLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
463595 %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
464 ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
465 ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
466 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
467 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
468 ; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
469 ; SLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
470 ; GLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
471596 %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
472597
473 ; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
474 ; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
475 ; AVX: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
476 ; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
477 ; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
478 ; SLM: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
479 ; GLM: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
480598 %F64 = call double @llvm.fabs.f64(double undef)
481 ; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
482 ; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
483 ; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
484 ; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
485 ; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
486 ; SLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
487 ; GLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
488599 %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
489 ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
490 ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
491 ; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
492 ; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
493 ; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
494 ; SLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
495 ; GLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
496600 %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
497 ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
498 ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
499 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
500 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
501 ; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
502 ; SLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
503 ; GLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
504601 %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
505602
506603 ret i32 undef
507604 }
508605
509 ; CHECK-LABEL: 'fcopysign'
510606 define i32 @fcopysign(i32 %arg) {
511 ; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
512 ; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
513 ; AVX: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
514 ; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
515 ; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
516 ; SLM: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
517 ; GLM: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
607 ; SSE-LABEL: 'fcopysign'
608 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
609 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
610 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
611 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
612 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
613 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
614 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
615 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
616 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
617 ;
618 ; AVX-LABEL: 'fcopysign'
619 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
620 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
621 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
622 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
623 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
624 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
625 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
626 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
627 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
628 ;
629 ; AVX512-LABEL: 'fcopysign'
630 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
631 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
632 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
633 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
634 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
635 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
636 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
637 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
638 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
639 ;
640 ; SLM-LABEL: 'fcopysign'
641 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
642 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
643 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
644 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
645 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
646 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
647 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
648 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
649 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
650 ;
651 ; GLM-LABEL: 'fcopysign'
652 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
653 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
654 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
655 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
656 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
657 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
658 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
659 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
660 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
661 ;
518662 %F32 = call float @llvm.copysign.f32(float undef, float undef)
519 ; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
520 ; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
521 ; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
522 ; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
523 ; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
524 ; SLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
525 ; GLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
526663 %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
527 ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
528 ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
529 ; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
530 ; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
531 ; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
532 ; SLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
533 ; GLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
534664 %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
535 ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
536 ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
537 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
538 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
539 ; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
540 ; SLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
541 ; GLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
542665 %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
543666
544 ; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
545 ; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
546 ; AVX: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
547 ; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
548 ; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
549 ; SLM: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
550 ; GLM: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
551667 %F64 = call double @llvm.copysign.f64(double undef, double undef)
552 ; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
553 ; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
554 ; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
555 ; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
556 ; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
557 ; SLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
558 ; GLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
559668 %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
560 ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
561 ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
562 ; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
563 ; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
564 ; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
565 ; SLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
566 ; GLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
567669 %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
568 ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
569 ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
570 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
571 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
572 ; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
573 ; SLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
574 ; GLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
575670 %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
576671
577672 ret i32 undef
578673 }
579674
580 ; CHECK-LABEL: 'fma'
581675 define i32 @fma(i32 %arg) {
582 ; SSE2: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
583 ; SSE42: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
584 ; AVX: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
585 ; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
586 ; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
587 ; SLM: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
588 ; GLM: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
676 ; SSE-LABEL: 'fma'
677 ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
678 ; SSE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
679 ; SSE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
680 ; SSE-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
681 ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
682 ; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
683 ; SSE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
684 ; SSE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
685 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
686 ;
687 ; AVX-LABEL: 'fma'
688 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
689 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
690 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
691 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
692 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
693 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
694 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
695 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
696 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
697 ;
698 ; AVX512-LABEL: 'fma'
699 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
700 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
701 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
702 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
703 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
704 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
705 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
706 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
707 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
708 ;
709 ; SLM-LABEL: 'fma'
710 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
711 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
712 ; SLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
713 ; SLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
714 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
715 ; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
716 ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
717 ; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
718 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
719 ;
720 ; GLM-LABEL: 'fma'
721 ; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
722 ; GLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
723 ; GLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
724 ; GLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
725 ; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
726 ; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
727 ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
728 ; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
729 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
730 ;
589731 %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
590 ; SSE2: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
591 ; SSE42: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
592 ; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
593 ; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
594 ; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
595 ; SLM: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
596 ; GLM: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
597732 %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
598 ; SSE2: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
599 ; SSE42: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
600 ; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
601 ; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
602 ; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
603 ; SLM: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
604 ; GLM: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
605733 %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
606 ; SSE2: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
607 ; SSE42: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
608 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
609 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
610 ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
611 ; SLM: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
612 ; GLM: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
613734 %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
614735
615 ; SSE2: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
616 ; SSE42: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
617 ; AVX: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
618 ; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
619 ; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
620 ; SLM: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
621 ; GLM: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
622736 %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
623 ; SSE2: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
624 ; SSE42: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
625 ; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
626 ; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
627 ; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
628 ; SLM: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
629 ; GLM: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
630737 %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
631 ; SSE2: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
632 ; SSE42: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
633 ; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
634 ; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
635 ; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
636 ; SLM: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
637 ; GLM: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
638738 %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
639 ; SSE2: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
640 ; SSE42: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
641 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
642 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
643 ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
644 ; SLM: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
645 ; GLM: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
646739 %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
647740
648741 ret i32 undef
None ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
4 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
7 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
8 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM
0 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
4 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
5 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
6 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
7 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
8 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
9 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
910
1011 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
1112 target triple = "x86_64-apple-macosx10.8.0"
1213
13 ; CHECK-LABEL: 'add'
1414 define i32 @add(i32 %arg) {
15 ; CHECK: cost of 1 {{.*}} %I64 = add
15 ; SSE-LABEL: 'add'
16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
17 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
18 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = add <4 x i64> undef, undef
19 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = add <8 x i64> undef, undef
20 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
21 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
22 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = add <8 x i32> undef, undef
23 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = add <16 x i32> undef, undef
24 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
25 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
26 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = add <16 x i16> undef, undef
27 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = add <32 x i16> undef, undef
28 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
29 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
30 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = add <32 x i8> undef, undef
31 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = add <64 x i8> undef, undef
32 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
33 ;
34 ; AVX1-LABEL: 'add'
35 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
36 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
37 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = add <4 x i64> undef, undef
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = add <8 x i64> undef, undef
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
41 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = add <8 x i32> undef, undef
42 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = add <16 x i32> undef, undef
43 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
44 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
45 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = add <16 x i16> undef, undef
46 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = add <32 x i16> undef, undef
47 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
48 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
49 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = add <32 x i8> undef, undef
50 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = add <64 x i8> undef, undef
51 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
52 ;
53 ; AVX2-LABEL: 'add'
54 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
55 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
56 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef
57 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = add <8 x i64> undef, undef
58 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
59 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
60 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef
61 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = add <16 x i32> undef, undef
62 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
63 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef
65 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = add <32 x i16> undef, undef
66 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
67 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
68 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef
69 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = add <64 x i8> undef, undef
70 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
71 ;
72 ; AVX512F-LABEL: 'add'
73 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
74 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
75 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef
76 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef
77 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
78 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
79 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef
80 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef
81 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
82 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
83 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef
84 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = add <32 x i16> undef, undef
85 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
86 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
87 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef
88 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = add <64 x i8> undef, undef
89 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
90 ;
91 ; AVX512BW-LABEL: 'add'
92 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
93 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
94 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef
95 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef
96 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
97 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
98 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef
99 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef
100 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
101 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
102 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef
103 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = add <32 x i16> undef, undef
104 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
105 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
106 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef
107 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = add <64 x i8> undef, undef
108 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
109 ;
110 ; AVX512DQ-LABEL: 'add'
111 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
112 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
113 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = add <4 x i64> undef, undef
114 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = add <8 x i64> undef, undef
115 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
116 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
117 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = add <8 x i32> undef, undef
118 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = add <16 x i32> undef, undef
119 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
120 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
121 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = add <16 x i16> undef, undef
122 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = add <32 x i16> undef, undef
123 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
124 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
125 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = add <32 x i8> undef, undef
126 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = add <64 x i8> undef, undef
127 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
128 ;
129 ; SLM-LABEL: 'add'
130 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
131 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = add <2 x i64> undef, undef
132 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = add <4 x i64> undef, undef
133 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = add <8 x i64> undef, undef
134 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
135 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
136 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = add <8 x i32> undef, undef
137 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = add <16 x i32> undef, undef
138 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
139 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
140 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = add <16 x i16> undef, undef
141 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = add <32 x i16> undef, undef
142 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
143 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
144 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = add <32 x i8> undef, undef
145 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = add <64 x i8> undef, undef
146 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
147 ;
148 ; GLM-LABEL: 'add'
149 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
150 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef
151 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = add <4 x i64> undef, undef
152 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = add <8 x i64> undef, undef
153 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef
154 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef
155 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = add <8 x i32> undef, undef
156 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = add <16 x i32> undef, undef
157 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef
158 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef
159 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = add <16 x i16> undef, undef
160 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = add <32 x i16> undef, undef
161 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef
162 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef
163 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = add <32 x i8> undef, undef
164 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = add <64 x i8> undef, undef
165 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
166 ;
16167 %I64 = add i64 undef, undef
17 ; SSSE3: cost of 1 {{.*}} %V2I64 = add
18 ; SSE42: cost of 1 {{.*}} %V2I64 = add
19 ; SLM: cost of 4 {{.*}} %V2I64 = add
20 ; GLM: cost of 1 {{.*}} %V2I64 = add
21 ; AVX: cost of 1 {{.*}} %V2I64 = add
22 ; AVX2: cost of 1 {{.*}} %V2I64 = add
23 ; AVX512: cost of 1 {{.*}} %V2I64 = add
24168 %V2I64 = add <2 x i64> undef, undef
25 ; SSSE3: cost of 2 {{.*}} %V4I64 = add
26 ; SSE42: cost of 2 {{.*}} %V4I64 = add
27 ; SLM: cost of 8 {{.*}} %V4I64 = add
28 ; GLM: cost of 2 {{.*}} %V4I64 = add
29 ; AVX: cost of 4 {{.*}} %V4I64 = add
30 ; AVX2: cost of 1 {{.*}} %V4I64 = add
31 ; AVX512: cost of 1 {{.*}} %V4I64 = add
32169 %V4I64 = add <4 x i64> undef, undef
33 ; SSSE3: cost of 4 {{.*}} %V8I64 = add
34 ; SSE42: cost of 4 {{.*}} %V8I64 = add
35 ; SLM: cost of 16 {{.*}} %V8I64 = add
36 ; GLM: cost of 4 {{.*}} %V8I64 = add
37 ; AVX: cost of 8 {{.*}} %V8I64 = add
38 ; AVX2: cost of 2 {{.*}} %V8I64 = add
39 ; AVX512: cost of 1 {{.*}} %V8I64 = add
40170 %V8I64 = add <8 x i64> undef, undef
41171
42 ; CHECK: cost of 1 {{.*}} %I32 = add
43172 %I32 = add i32 undef, undef
44 ; SSSE3: cost of 1 {{.*}} %V4I32 = add
45 ; SSE42: cost of 1 {{.*}} %V4I32 = add
46 ; SLM: cost of 1 {{.*}} %V4I32 = add
47 ; GLM: cost of 1 {{.*}} %V4I32 = add
48 ; AVX: cost of 1 {{.*}} %V4I32 = add
49 ; AVX2: cost of 1 {{.*}} %V4I32 = add
50 ; AVX512: cost of 1 {{.*}} %V4I32 = add
51173 %V4I32 = add <4 x i32> undef, undef
52 ; SSSE3: cost of 2 {{.*}} %V8I32 = add
53 ; SSE42: cost of 2 {{.*}} %V8I32 = add
54 ; SLM: cost of 2 {{.*}} %V8I32 = add
55 ; GLM: cost of 2 {{.*}} %V8I32 = add
56 ; AVX: cost of 4 {{.*}} %V8I32 = add
57 ; AVX2: cost of 1 {{.*}} %V8I32 = add
58 ; AVX512: cost of 1 {{.*}} %V8I32 = add
59174 %V8I32 = add <8 x i32> undef, undef
60 ; SSSE3: cost of 4 {{.*}} %V16I32 = add
61 ; SSE42: cost of 4 {{.*}} %V16I32 = add
62 ; SLM: cost of 4 {{.*}} %V16I32 = add
63 ; GLM: cost of 4 {{.*}} %V16I32 = add
64 ; AVX: cost of 8 {{.*}} %V16I32 = add
65 ; AVX2: cost of 2 {{.*}} %V16I32 = add
66 ; AVX512: cost of 1 {{.*}} %V16I32 = add
67175 %V16I32 = add <16 x i32> undef, undef
68176
69 ; CHECK: cost of 1 {{.*}} %I16 = add
70177 %I16 = add i16 undef, undef
71 ; SSSE3: cost of 1 {{.*}} %V8I16 = add
72 ; SSE42: cost of 1 {{.*}} %V8I16 = add
73 ; SLM: cost of 1 {{.*}} %V8I16 = add
74 ; GLM: cost of 1 {{.*}} %V8I16 = add
75 ; AVX: cost of 1 {{.*}} %V8I16 = add
76 ; AVX2: cost of 1 {{.*}} %V8I16 = add
77 ; AVX512: cost of 1 {{.*}} %V8I16 = add
78178 %V8I16 = add <8 x i16> undef, undef
79 ; SSSE3: cost of 2 {{.*}} %V16I16 = add
80 ; SSE42: cost of 2 {{.*}} %V16I16 = add
81 ; SLM: cost of 2 {{.*}} %V16I16 = add
82 ; GLM: cost of 2 {{.*}} %V16I16 = add
83 ; AVX: cost of 4 {{.*}} %V16I16 = add
84 ; AVX2: cost of 1 {{.*}} %V16I16 = add
85 ; AVX512: cost of 1 {{.*}} %V16I16 = add
86179 %V16I16 = add <16 x i16> undef, undef
87 ; SSSE3: cost of 4 {{.*}} %V32I16 = add
88 ; SSE42: cost of 4 {{.*}} %V32I16 = add
89 ; SLM: cost of 4 {{.*}} %V32I16 = add
90 ; GLM: cost of 4 {{.*}} %V32I16 = add
91 ; AVX: cost of 8 {{.*}} %V32I16 = add
92 ; AVX2: cost of 2 {{.*}} %V32I16 = add
93 ; AVX512F: cost of 2 {{.*}} %V32I16 = add
94 ; AVX512BW: cost of 1 {{.*}} %V32I16 = add
95180 %V32I16 = add <32 x i16> undef, undef
96181
97 ; CHECK: cost of 1 {{.*}} %I8 = add
98182 %I8 = add i8 undef, undef
99 ; SSSE3: cost of 1 {{.*}} %V16I8 = add
100 ; SSE42: cost of 1 {{.*}} %V16I8 = add
101 ; SLM: cost of 1 {{.*}} %V16I8 = add
102 ; GLM: cost of 1 {{.*}} %V16I8 = add
103 ; AVX: cost of 1 {{.*}} %V16I8 = add
104 ; AVX2: cost of 1 {{.*}} %V16I8 = add
105 ; AVX512: cost of 1 {{.*}} %V16I8 = add
106183 %V16I8 = add <16 x i8> undef, undef
107 ; SSSE3: cost of 2 {{.*}} %V32I8 = add
108 ; SSE42: cost of 2 {{.*}} %V32I8 = add
109 ; SLM: cost of 2 {{.*}} %V32I8 = add
110 ; GLM: cost of 2 {{.*}} %V32I8 = add
111 ; AVX: cost of 4 {{.*}} %V32I8 = add
112 ; AVX2: cost of 1 {{.*}} %V32I8 = add
113 ; AVX512: cost of 1 {{.*}} %V32I8 = add
114184 %V32I8 = add <32 x i8> undef, undef
115 ; SSSE3: cost of 4 {{.*}} %V64I8 = add
116 ; SSE42: cost of 4 {{.*}} %V64I8 = add
117 ; SLM: cost of 4 {{.*}} %V64I8 = add
118 ; GLM: cost of 4 {{.*}} %V64I8 = add
119 ; AVX: cost of 8 {{.*}} %V64I8 = add
120 ; AVX2: cost of 2 {{.*}} %V64I8 = add
121 ; AVX512F: cost of 2 {{.*}} %V64I8 = add
122 ; AVX512BW: cost of 1 {{.*}} %V64I8 = add
123185 %V64I8 = add <64 x i8> undef, undef
124186
125187 ret i32 undef
126188 }
127189
128 ; CHECK-LABEL: 'sub'
129190 define i32 @sub(i32 %arg) {
130 ; CHECK: cost of 1 {{.*}} %I64 = sub
191 ; SSE-LABEL: 'sub'
192 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
193 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
194 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = sub <4 x i64> undef, undef
195 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = sub <8 x i64> undef, undef
196 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
197 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
198 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = sub <8 x i32> undef, undef
199 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = sub <16 x i32> undef, undef
200 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
201 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
202 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = sub <16 x i16> undef, undef
203 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = sub <32 x i16> undef, undef
204 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
205 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
206 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = sub <32 x i8> undef, undef
207 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = sub <64 x i8> undef, undef
208 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
209 ;
210 ; AVX1-LABEL: 'sub'
211 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
212 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
213 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = sub <4 x i64> undef, undef
214 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = sub <8 x i64> undef, undef
215 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
216 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
217 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = sub <8 x i32> undef, undef
218 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = sub <16 x i32> undef, undef
219 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
220 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
221 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = sub <16 x i16> undef, undef
222 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = sub <32 x i16> undef, undef
223 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
224 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
225 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = sub <32 x i8> undef, undef
226 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = sub <64 x i8> undef, undef
227 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
228 ;
229 ; AVX2-LABEL: 'sub'
230 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
231 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
232 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef
233 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = sub <8 x i64> undef, undef
234 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
235 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
236 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef
237 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = sub <16 x i32> undef, undef
238 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
239 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
240 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef
241 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = sub <32 x i16> undef, undef
242 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
243 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
244 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef
245 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = sub <64 x i8> undef, undef
246 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
247 ;
248 ; AVX512F-LABEL: 'sub'
249 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
250 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
251 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef
252 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef
253 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
254 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
255 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef
256 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef
257 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = sub <32 x i16> undef, undef
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef
264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = sub <64 x i8> undef, undef
265 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
266 ;
267 ; AVX512BW-LABEL: 'sub'
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef
272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef
275 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef
276 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
277 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
278 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef
279 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = sub <32 x i16> undef, undef
280 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
281 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
282 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef
283 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = sub <64 x i8> undef, undef
284 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
285 ;
286 ; AVX512DQ-LABEL: 'sub'
287 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
288 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
289 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sub <4 x i64> undef, undef
290 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = sub <8 x i64> undef, undef
291 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
292 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
293 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sub <8 x i32> undef, undef
294 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = sub <16 x i32> undef, undef
295 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
296 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
297 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sub <16 x i16> undef, undef
298 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = sub <32 x i16> undef, undef
299 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
300 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
301 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = sub <32 x i8> undef, undef
302 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = sub <64 x i8> undef, undef
303 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
304 ;
305 ; SLM-LABEL: 'sub'
306 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
307 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = sub <2 x i64> undef, undef
308 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = sub <4 x i64> undef, undef
309 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = sub <8 x i64> undef, undef
310 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
311 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
312 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = sub <8 x i32> undef, undef
313 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = sub <16 x i32> undef, undef
314 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
315 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
316 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = sub <16 x i16> undef, undef
317 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = sub <32 x i16> undef, undef
318 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
319 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
320 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = sub <32 x i8> undef, undef
321 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = sub <64 x i8> undef, undef
322 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
323 ;
324 ; GLM-LABEL: 'sub'
325 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef
326 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef
327 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = sub <4 x i64> undef, undef
328 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = sub <8 x i64> undef, undef
329 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef
330 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef
331 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = sub <8 x i32> undef, undef
332 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = sub <16 x i32> undef, undef
333 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef
334 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef
335 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = sub <16 x i16> undef, undef
336 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = sub <32 x i16> undef, undef
337 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef
338 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef
339 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = sub <32 x i8> undef, undef
340 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = sub <64 x i8> undef, undef
341 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
342 ;
131343 %I64 = sub i64 undef, undef
132 ; SSSE3: cost of 1 {{.*}} %V2I64 = sub
133 ; SSE42: cost of 1 {{.*}} %V2I64 = sub
134 ; SLM: cost of 4 {{.*}} %V2I64 = sub
135 ; GLM: cost of 1 {{.*}} %V2I64 = sub
136 ; AVX: cost of 1 {{.*}} %V2I64 = sub
137 ; AVX2: cost of 1 {{.*}} %V2I64 = sub
138 ; AVX512: cost of 1 {{.*}} %V2I64 = sub
139344 %V2I64 = sub <2 x i64> undef, undef
140 ; SSSE3: cost of 2 {{.*}} %V4I64 = sub
141 ; SSE42: cost of 2 {{.*}} %V4I64 = sub
142 ; SLM: cost of 8 {{.*}} %V4I64 = sub
143 ; GLM: cost of 2 {{.*}} %V4I64 = sub
144 ; AVX: cost of 4 {{.*}} %V4I64 = sub
145 ; AVX2: cost of 1 {{.*}} %V4I64 = sub
146 ; AVX512: cost of 1 {{.*}} %V4I64 = sub
147345 %V4I64 = sub <4 x i64> undef, undef
148 ; SSSE3: cost of 4 {{.*}} %V8I64 = sub
149 ; SSE42: cost of 4 {{.*}} %V8I64 = sub
150 ; SLM: cost of 16 {{.*}} %V8I64 = sub
151 ; GLM: cost of 4 {{.*}} %V8I64 = sub
152 ; AVX: cost of 8 {{.*}} %V8I64 = sub
153 ; AVX2: cost of 2 {{.*}} %V8I64 = sub
154 ; AVX512: cost of 1 {{.*}} %V8I64 = sub
155346 %V8I64 = sub <8 x i64> undef, undef
156347
157 ; CHECK: cost of 1 {{.*}} %I32 = sub
158348 %I32 = sub i32 undef, undef
159 ; SSSE3: cost of 1 {{.*}} %V4I32 = sub
160 ; SSE42: cost of 1 {{.*}} %V4I32 = sub
161 ; SLM: cost of 1 {{.*}} %V4I32 = sub
162 ; GLM: cost of 1 {{.*}} %V4I32 = sub
163 ; AVX: cost of 1 {{.*}} %V4I32 = sub
164 ; AVX2: cost of 1 {{.*}} %V4I32 = sub
165 ; AVX512: cost of 1 {{.*}} %V4I32 = sub
166349 %V4I32 = sub <4 x i32> undef, undef
167 ; SSSE3: cost of 2 {{.*}} %V8I32 = sub
168 ; SSE42: cost of 2 {{.*}} %V8I32 = sub
169 ; SLM: cost of 2 {{.*}} %V8I32 = sub
170 ; GLM: cost of 2 {{.*}} %V8I32 = sub
171 ; AVX: cost of 4 {{.*}} %V8I32 = sub
172 ; AVX2: cost of 1 {{.*}} %V8I32 = sub
173 ; AVX512: cost of 1 {{.*}} %V8I32 = sub
174350 %V8I32 = sub <8 x i32> undef, undef
175 ; SSSE3: cost of 4 {{.*}} %V16I32 = sub
176 ; SSE42: cost of 4 {{.*}} %V16I32 = sub
177 ; SLM: cost of 4 {{.*}} %V16I32 = sub
178 ; GLM: cost of 4 {{.*}} %V16I32 = sub
179 ; AVX: cost of 8 {{.*}} %V16I32 = sub
180 ; AVX2: cost of 2 {{.*}} %V16I32 = sub
181 ; AVX512: cost of 1 {{.*}} %V16I32 = sub
182351 %V16I32 = sub <16 x i32> undef, undef
183352
184 ; CHECK: cost of 1 {{.*}} %I16 = sub
185353 %I16 = sub i16 undef, undef
186 ; SSSE3: cost of 1 {{.*}} %V8I16 = sub
187 ; SSE42: cost of 1 {{.*}} %V8I16 = sub
188 ; SLM: cost of 1 {{.*}} %V8I16 = sub
189 ; GLM: cost of 1 {{.*}} %V8I16 = sub
190 ; AVX: cost of 1 {{.*}} %V8I16 = sub
191 ; AVX2: cost of 1 {{.*}} %V8I16 = sub
192 ; AVX512: cost of 1 {{.*}} %V8I16 = sub
193354 %V8I16 = sub <8 x i16> undef, undef
194 ; SSSE3: cost of 2 {{.*}} %V16I16 = sub
195 ; SSE42: cost of 2 {{.*}} %V16I16 = sub
196 ; SLM: cost of 2 {{.*}} %V16I16 = sub
197 ; GLM: cost of 2 {{.*}} %V16I16 = sub
198 ; AVX: cost of 4 {{.*}} %V16I16 = sub
199 ; AVX2: cost of 1 {{.*}} %V16I16 = sub
200 ; AVX512: cost of 1 {{.*}} %V16I16 = sub
201355 %V16I16 = sub <16 x i16> undef, undef
202 ; SSSE3: cost of 4 {{.*}} %V32I16 = sub
203 ; SSE42: cost of 4 {{.*}} %V32I16 = sub
204 ; SLM: cost of 4 {{.*}} %V32I16 = sub
205 ; GLM: cost of 4 {{.*}} %V32I16 = sub
206 ; AVX: cost of 8 {{.*}} %V32I16 = sub
207 ; AVX2: cost of 2 {{.*}} %V32I16 = sub
208 ; AVX512F: cost of 2 {{.*}} %V32I16 = sub
209 ; AVX512BW: cost of 1 {{.*}} %V32I16 = sub
210356 %V32I16 = sub <32 x i16> undef, undef
211357
212 ; CHECK: cost of 1 {{.*}} %I8 = sub
213358 %I8 = sub i8 undef, undef
214 ; SSSE3: cost of 1 {{.*}} %V16I8 = sub
215 ; SSE42: cost of 1 {{.*}} %V16I8 = sub
216 ; SLM: cost of 1 {{.*}} %V16I8 = sub
217 ; GLM: cost of 1 {{.*}} %V16I8 = sub
218 ; AVX: cost of 1 {{.*}} %V16I8 = sub
219 ; AVX2: cost of 1 {{.*}} %V16I8 = sub
220 ; AVX512: cost of 1 {{.*}} %V16I8 = sub
221359 %V16I8 = sub <16 x i8> undef, undef
222 ; SSSE3: cost of 2 {{.*}} %V32I8 = sub
223 ; SSE42: cost of 2 {{.*}} %V32I8 = sub
224 ; SLM: cost of 2 {{.*}} %V32I8 = sub
225 ; GLM: cost of 2 {{.*}} %V32I8 = sub
226 ; AVX: cost of 4 {{.*}} %V32I8 = sub
227 ; AVX2: cost of 1 {{.*}} %V32I8 = sub
228 ; AVX512: cost of 1 {{.*}} %V32I8 = sub
229360 %V32I8 = sub <32 x i8> undef, undef
230 ; SSSE3: cost of 4 {{.*}} %V64I8 = sub
231 ; SSE42: cost of 4 {{.*}} %V64I8 = sub
232 ; SLM: cost of 4 {{.*}} %V64I8 = sub
233 ; GLM: cost of 4 {{.*}} %V64I8 = sub
234 ; AVX: cost of 8 {{.*}} %V64I8 = sub
235 ; AVX2: cost of 2 {{.*}} %V64I8 = sub
236 ; AVX512F: cost of 2 {{.*}} %V64I8 = sub
237 ; AVX512BW: cost of 1 {{.*}} %V64I8 = sub
238361 %V64I8 = sub <64 x i8> undef, undef
239362
240363 ret i32 undef
241364 }
242365
243 ; CHECK-LABEL: 'or'
244366 define i32 @or(i32 %arg) {
245 ; CHECK: cost of 1 {{.*}} %I64 = or
367 ; SSE-LABEL: 'or'
368 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
369 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
370 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
371 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
372 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
373 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
374 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
375 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
376 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
377 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
378 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
379 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
380 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
381 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
382 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
383 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
384 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
385 ;
386 ; AVX-LABEL: 'or'
387 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
388 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
389 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
390 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = or <8 x i64> undef, undef
391 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
392 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
393 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
394 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = or <16 x i32> undef, undef
395 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
396 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
397 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
398 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = or <32 x i16> undef, undef
399 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
400 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
401 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
402 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
403 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
404 ;
405 ; AVX512F-LABEL: 'or'
406 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
407 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
408 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
409 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
410 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
411 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
412 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
413 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
414 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
415 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
416 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
417 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = or <32 x i16> undef, undef
418 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
419 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
420 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
421 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
422 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
423 ;
424 ; AVX512BW-LABEL: 'or'
425 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
426 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
427 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
428 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
429 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
430 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
431 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
432 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
433 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
434 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
435 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
436 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef
437 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
438 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
439 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
440 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
441 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
442 ;
443 ; AVX512DQ-LABEL: 'or'
444 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
445 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
446 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
447 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
448 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
449 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
450 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
451 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
452 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
453 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
454 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
455 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = or <32 x i16> undef, undef
456 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
457 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
458 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
459 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
460 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
461 ;
462 ; SLM-LABEL: 'or'
463 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
464 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
465 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
466 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
467 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
468 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
469 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
470 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
471 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
472 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
473 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
474 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
475 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
476 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
477 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
478 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
479 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
480 ;
481 ; GLM-LABEL: 'or'
482 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
483 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
484 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
485 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
486 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
487 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
488 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
489 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
490 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
491 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
492 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
493 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
494 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
495 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
496 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
497 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
498 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
499 ;
246500 %I64 = or i64 undef, undef
247 ; SSSE3: cost of 1 {{.*}} %V2I64 = or
248 ; SSE42: cost of 1 {{.*}} %V2I64 = or
249 ; SLM: cost of 1 {{.*}} %V2I64 = or
250 ; GLM: cost of 1 {{.*}} %V2I64 = or
251 ; AVX: cost of 1 {{.*}} %V2I64 = or
252 ; AVX2: cost of 1 {{.*}} %V2I64 = or
253 ; AVX512: cost of 1 {{.*}} %V2I64 = or
254501 %V2I64 = or <2 x i64> undef, undef
255 ; SSSE3: cost of 2 {{.*}} %V4I64 = or
256 ; SSE42: cost of 2 {{.*}} %V4I64 = or
257 ; SLM: cost of 2 {{.*}} %V4I64 = or
258 ; GLM: cost of 2 {{.*}} %V4I64 = or
259 ; AVX: cost of 1 {{.*}} %V4I64 = or
260 ; AVX2: cost of 1 {{.*}} %V4I64 = or
261 ; AVX512: cost of 1 {{.*}} %V4I64 = or
262502 %V4I64 = or <4 x i64> undef, undef
263 ; SSSE3: cost of 4 {{.*}} %V8I64 = or
264 ; SSE42: cost of 4 {{.*}} %V8I64 = or
265 ; SLM: cost of 4 {{.*}} %V8I64 = or
266 ; GLM: cost of 4 {{.*}} %V8I64 = or
267 ; AVX: cost of 2 {{.*}} %V8I64 = or
268 ; AVX2: cost of 2 {{.*}} %V8I64 = or
269 ; AVX512: cost of 1 {{.*}} %V8I64 = or
270503 %V8I64 = or <8 x i64> undef, undef
271504
272 ; CHECK: cost of 1 {{.*}} %I32 = or
273505 %I32 = or i32 undef, undef
274 ; SSSE3: cost of 1 {{.*}} %V4I32 = or
275 ; SSE42: cost of 1 {{.*}} %V4I32 = or
276 ; SLM: cost of 1 {{.*}} %V4I32 = or
277 ; GLM: cost of 1 {{.*}} %V4I32 = or
278 ; AVX: cost of 1 {{.*}} %V4I32 = or
279 ; AVX2: cost of 1 {{.*}} %V4I32 = or
280 ; AVX512: cost of 1 {{.*}} %V4I32 = or
281506 %V4I32 = or <4 x i32> undef, undef
282 ; SSSE3: cost of 2 {{.*}} %V8I32 = or
283 ; SSE42: cost of 2 {{.*}} %V8I32 = or
284 ; SLM: cost of 2 {{.*}} %V8I32 = or
285 ; GLM: cost of 2 {{.*}} %V8I32 = or
286 ; AVX: cost of 1 {{.*}} %V8I32 = or
287 ; AVX2: cost of 1 {{.*}} %V8I32 = or
288 ; AVX512: cost of 1 {{.*}} %V8I32 = or
289507 %V8I32 = or <8 x i32> undef, undef
290 ; SSSE3: cost of 4 {{.*}} %V16I32 = or
291 ; SSE42: cost of 4 {{.*}} %V16I32 = or
292 ; SLM: cost of 4 {{.*}} %V16I32 = or
293 ; GLM: cost of 4 {{.*}} %V16I32 = or
294 ; AVX: cost of 2 {{.*}} %V16I32 = or
295 ; AVX2: cost of 2 {{.*}} %V16I32 = or
296 ; AVX512: cost of 1 {{.*}} %V16I32 = or
297508 %V16I32 = or <16 x i32> undef, undef
298509
299 ; CHECK: cost of 1 {{.*}} %I16 = or
300510 %I16 = or i16 undef, undef
301 ; SSSE3: cost of 1 {{.*}} %V8I16 = or
302 ; SSE42: cost of 1 {{.*}} %V8I16 = or
303 ; SLM: cost of 1 {{.*}} %V8I16 = or
304 ; GLM: cost of 1 {{.*}} %V8I16 = or
305 ; AVX: cost of 1 {{.*}} %V8I16 = or
306 ; AVX2: cost of 1 {{.*}} %V8I16 = or
307 ; AVX512: cost of 1 {{.*}} %V8I16 = or
308511 %V8I16 = or <8 x i16> undef, undef
309 ; SSSE3: cost of 2 {{.*}} %V16I16 = or
310 ; SSE42: cost of 2 {{.*}} %V16I16 = or
311 ; SLM: cost of 2 {{.*}} %V16I16 = or
312 ; GLM: cost of 2 {{.*}} %V16I16 = or
313 ; AVX: cost of 1 {{.*}} %V16I16 = or
314 ; AVX2: cost of 1 {{.*}} %V16I16 = or
315 ; AVX512: cost of 1 {{.*}} %V16I16 = or
316512 %V16I16 = or <16 x i16> undef, undef
317 ; SSSE3: cost of 4 {{.*}} %V32I16 = or
318 ; SSE42: cost of 4 {{.*}} %V32I16 = or
319 ; SLM: cost of 4 {{.*}} %V32I16 = or
320 ; GLM: cost of 4 {{.*}} %V32I16 = or
321 ; AVX: cost of 2 {{.*}} %V32I16 = or
322 ; AVX2: cost of 2 {{.*}} %V32I16 = or
323 ; AVX512F: cost of 2 {{.*}} %V32I16 = or
324 ; AVX512BW: cost of 1 {{.*}} %V32I16 = or
325513 %V32I16 = or <32 x i16> undef, undef
326514
327 ; CHECK: cost of 1 {{.*}} %I8 = or
328515 %I8 = or i8 undef, undef
329 ; SSSE3: cost of 1 {{.*}} %V16I8 = or
330 ; SSE42: cost of 1 {{.*}} %V16I8 = or
331 ; SLM: cost of 1 {{.*}} %V16I8 = or
332 ; GLM: cost of 1 {{.*}} %V16I8 = or
333 ; AVX: cost of 1 {{.*}} %V16I8 = or
334 ; AVX2: cost of 1 {{.*}} %V16I8 = or
335 ; AVX512: cost of 1 {{.*}} %V16I8 = or
336516 %V16I8 = or <16 x i8> undef, undef
337 ; SSSE3: cost of 2 {{.*}} %V32I8 = or
338 ; SSE42: cost of 2 {{.*}} %V32I8 = or
339 ; SLM: cost of 2 {{.*}} %V32I8 = or
340 ; GLM: cost of 2 {{.*}} %V32I8 = or
341 ; AVX: cost of 1 {{.*}} %V32I8 = or
342 ; AVX2: cost of 1 {{.*}} %V32I8 = or
343 ; AVX512: cost of 1 {{.*}} %V32I8 = or
344517 %V32I8 = or <32 x i8> undef, undef
345 ; SSSE3: cost of 4 {{.*}} %V64I8 = or
346 ; SSE42: cost of 4 {{.*}} %V64I8 = or
347 ; SLM: cost of 4 {{.*}} %V64I8 = or
348 ; GLM: cost of 4 {{.*}} %V64I8 = or
349 ; AVX: cost of 2 {{.*}} %V64I8 = or
350 ; AVX2: cost of 2 {{.*}} %V64I8 = or
351 ; AVX512F: cost of 2 {{.*}} %V64I8 = or
352 ; AVX512BW: cost of 1 {{.*}} %V64I8 = or
353518 %V64I8 = or <64 x i8> undef, undef
354519
355520 ret i32 undef
356521 }
357522
358 ; CHECK-LABEL: 'xor'
359523 define i32 @xor(i32 %arg) {
360 ; CHECK: cost of 1 {{.*}} %I64 = xor
524 ; SSE-LABEL: 'xor'
525 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
526 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
527 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
528 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
529 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
530 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
531 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
532 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
533 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
534 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
535 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
536 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
537 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
538 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
539 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
540 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
541 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
542 ;
543 ; AVX-LABEL: 'xor'
544 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
545 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
546 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
547 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = xor <8 x i64> undef, undef
548 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
549 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
550 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
551 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = xor <16 x i32> undef, undef
552 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
553 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
554 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
555 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = xor <32 x i16> undef, undef
556 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
557 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
558 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
559 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
560 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
561 ;
562 ; AVX512F-LABEL: 'xor'
563 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
564 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
565 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
566 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
567 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
568 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
569 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
570 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
571 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
572 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
573 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
574 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = xor <32 x i16> undef, undef
575 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
576 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
577 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
578 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
579 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
580 ;
581 ; AVX512BW-LABEL: 'xor'
582 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
583 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
584 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
585 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
586 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
587 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
588 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
589 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
590 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
591 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
592 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
593 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef
594 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
595 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
596 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
597 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
598 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
599 ;
600 ; AVX512DQ-LABEL: 'xor'
601 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
602 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
603 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
604 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
605 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
606 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
607 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
608 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
609 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
610 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
611 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
612 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = xor <32 x i16> undef, undef
613 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
614 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
615 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
616 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
617 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
618 ;
619 ; SLM-LABEL: 'xor'
620 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
621 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
622 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
623 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
624 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
625 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
626 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
627 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
628 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
629 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
630 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
631 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
632 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
633 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
634 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
635 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
636 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
637 ;
638 ; GLM-LABEL: 'xor'
639 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
640 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
641 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
642 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
643 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
644 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
645 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
646 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
647 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
648 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
649 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
650 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
651 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
652 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
653 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
654 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
655 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
656 ;
361657 %I64 = xor i64 undef, undef
362 ; SSSE3: cost of 1 {{.*}} %V2I64 = xor
363 ; SSE42: cost of 1 {{.*}} %V2I64 = xor
364 ; SLM: cost of 1 {{.*}} %V2I64 = xor
365 ; GLM: cost of 1 {{.*}} %V2I64 = xor
366 ; AVX: cost of 1 {{.*}} %V2I64 = xor
367 ; AVX2: cost of 1 {{.*}} %V2I64 = xor
368 ; AVX512: cost of 1 {{.*}} %V2I64 = xor
369658 %V2I64 = xor <2 x i64> undef, undef
370 ; SSSE3: cost of 2 {{.*}} %V4I64 = xor
371 ; SSE42: cost of 2 {{.*}} %V4I64 = xor
372 ; SLM: cost of 2 {{.*}} %V4I64 = xor
373 ; GLM: cost of 2 {{.*}} %V4I64 = xor
374 ; AVX: cost of 1 {{.*}} %V4I64 = xor
375 ; AVX2: cost of 1 {{.*}} %V4I64 = xor
376 ; AVX512: cost of 1 {{.*}} %V4I64 = xor
377659 %V4I64 = xor <4 x i64> undef, undef
378 ; SSSE3: cost of 4 {{.*}} %V8I64 = xor
379 ; SSE42: cost of 4 {{.*}} %V8I64 = xor
380 ; SLM: cost of 4 {{.*}} %V8I64 = xor
381 ; GLM: cost of 4 {{.*}} %V8I64 = xor
382 ; AVX: cost of 2 {{.*}} %V8I64 = xor
383 ; AVX2: cost of 2 {{.*}} %V8I64 = xor
384 ; AVX512: cost of 1 {{.*}} %V8I64 = xor
385660 %V8I64 = xor <8 x i64> undef, undef
386661
387 ; CHECK: cost of 1 {{.*}} %I32 = xor
388662 %I32 = xor i32 undef, undef
389 ; SSSE3: cost of 1 {{.*}} %V4I32 = xor
390 ; SSE42: cost of 1 {{.*}} %V4I32 = xor
391 ; SLM: cost of 1 {{.*}} %V4I32 = xor
392 ; GLM: cost of 1 {{.*}} %V4I32 = xor
393 ; AVX: cost of 1 {{.*}} %V4I32 = xor
394 ; AVX2: cost of 1 {{.*}} %V4I32 = xor
395 ; AVX512: cost of 1 {{.*}} %V4I32 = xor
396663 %V4I32 = xor <4 x i32> undef, undef
397 ; SSSE3: cost of 2 {{.*}} %V8I32 = xor
398 ; SSE42: cost of 2 {{.*}} %V8I32 = xor
399 ; SLM: cost of 2 {{.*}} %V8I32 = xor
400 ; GLM: cost of 2 {{.*}} %V8I32 = xor
401 ; AVX: cost of 1 {{.*}} %V8I32 = xor
402 ; AVX2: cost of 1 {{.*}} %V8I32 = xor
403 ; AVX512: cost of 1 {{.*}} %V8I32 = xor
404664 %V8I32 = xor <8 x i32> undef, undef
405 ; SSSE3: cost of 4 {{.*}} %V16I32 = xor
406 ; SSE42: cost of 4 {{.*}} %V16I32 = xor
407 ; SLM: cost of 4 {{.*}} %V16I32 = xor
408 ; GLM: cost of 4 {{.*}} %V16I32 = xor
409 ; AVX: cost of 2 {{.*}} %V16I32 = xor
410 ; AVX2: cost of 2 {{.*}} %V16I32 = xor
411 ; AVX512: cost of 1 {{.*}} %V16I32 = xor
412665 %V16I32 = xor <16 x i32> undef, undef
413666
414 ; CHECK: cost of 1 {{.*}} %I16 = xor
415667 %I16 = xor i16 undef, undef
416 ; SSSE3: cost of 1 {{.*}} %V8I16 = xor
417 ; SSE42: cost of 1 {{.*}} %V8I16 = xor
418 ; SLM: cost of 1 {{.*}} %V8I16 = xor
419 ; GLM: cost of 1 {{.*}} %V8I16 = xor
420 ; AVX: cost of 1 {{.*}} %V8I16 = xor
421 ; AVX2: cost of 1 {{.*}} %V8I16 = xor
422 ; AVX512: cost of 1 {{.*}} %V8I16 = xor
423668 %V8I16 = xor <8 x i16> undef, undef
424 ; SSSE3: cost of 2 {{.*}} %V16I16 = xor
425 ; SSE42: cost of 2 {{.*}} %V16I16 = xor
426 ; SLM: cost of 2 {{.*}} %V16I16 = xor
427 ; GLM: cost of 2 {{.*}} %V16I16 = xor
428 ; AVX: cost of 1 {{.*}} %V16I16 = xor
429 ; AVX2: cost of 1 {{.*}} %V16I16 = xor
430 ; AVX512: cost of 1 {{.*}} %V16I16 = xor
431669 %V16I16 = xor <16 x i16> undef, undef
432 ; SSSE3: cost of 4 {{.*}} %V32I16 = xor
433 ; SSE42: cost of 4 {{.*}} %V32I16 = xor
434 ; SLM: cost of 4 {{.*}} %V32I16 = xor
435 ; GLM: cost of 4 {{.*}} %V32I16 = xor
436 ; AVX: cost of 2 {{.*}} %V32I16 = xor
437 ; AVX2: cost of 2 {{.*}} %V32I16 = xor
438 ; AVX512F: cost of 2 {{.*}} %V32I16 = xor
439 ; AVX512BW: cost of 1 {{.*}} %V32I16 = xor
440670 %V32I16 = xor <32 x i16> undef, undef
441671
442 ; CHECK: cost of 1 {{.*}} %I8 = xor
443672 %I8 = xor i8 undef, undef
444 ; SSSE3: cost of 1 {{.*}} %V16I8 = xor
445 ; SSE42: cost of 1 {{.*}} %V16I8 = xor
446 ; SLM: cost of 1 {{.*}} %V16I8 = xor
447 ; GLM: cost of 1 {{.*}} %V16I8 = xor
448 ; AVX: cost of 1 {{.*}} %V16I8 = xor
449 ; AVX2: cost of 1 {{.*}} %V16I8 = xor
450 ; AVX512: cost of 1 {{.*}} %V16I8 = xor
451673 %V16I8 = xor <16 x i8> undef, undef
452 ; SSSE3: cost of 2 {{.*}} %V32I8 = xor
453 ; SSE42: cost of 2 {{.*}} %V32I8 = xor
454 ; SLM: cost of 2 {{.*}} %V32I8 = xor
455 ; GLM: cost of 2 {{.*}} %V32I8 = xor
456 ; AVX: cost of 1 {{.*}} %V32I8 = xor
457 ; AVX2: cost of 1 {{.*}} %V32I8 = xor
458 ; AVX512: cost of 1 {{.*}} %V32I8 = xor
459674 %V32I8 = xor <32 x i8> undef, undef
460 ; SSSE3: cost of 4 {{.*}} %V64I8 = xor
461 ; SSE42: cost of 4 {{.*}} %V64I8 = xor
462 ; SLM: cost of 4 {{.*}} %V64I8 = xor
463 ; GLM: cost of 4 {{.*}} %V64I8 = xor
464 ; AVX: cost of 2 {{.*}} %V64I8 = xor
465 ; AVX2: cost of 2 {{.*}} %V64I8 = xor
466 ; AVX512F: cost of 2 {{.*}} %V64I8 = xor
467 ; AVX512BW: cost of 1 {{.*}} %V64I8 = xor
468675 %V64I8 = xor <64 x i8> undef, undef
469676
470677 ret i32 undef
471678 }
472679
473 ; CHECK-LABEL: 'and'
474680 define i32 @and(i32 %arg) {
475 ; CHECK: cost of 1 {{.*}} %I64 = and
681 ; SSE-LABEL: 'and'
682 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
683 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
684 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
685 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
686 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
687 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
688 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
689 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
690 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
691 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
692 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
693 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
694 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
695 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
696 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
697 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
698 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
699 ;
700 ; AVX-LABEL: 'and'
701 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
702 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
703 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
704 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = and <8 x i64> undef, undef
705 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
706 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
707 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
708 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = and <16 x i32> undef, undef
709 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
710 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
711 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
712 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = and <32 x i16> undef, undef
713 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
714 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
715 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
716 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
717 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
718 ;
719 ; AVX512F-LABEL: 'and'
720 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
721 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
722 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
723 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
724 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
725 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
726 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
727 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
728 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
729 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
730 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
731 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = and <32 x i16> undef, undef
732 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
733 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
734 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
735 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
736 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
737 ;
738 ; AVX512BW-LABEL: 'and'
739 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
740 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
741 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
742 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
743 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
744 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
745 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
746 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
747 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
748 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
749 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
750 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef
751 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
752 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
753 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
754 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
755 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
756 ;
757 ; AVX512DQ-LABEL: 'and'
758 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
759 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
760 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
761 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
762 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
763 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
764 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
765 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
766 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
767 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
768 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
769 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = and <32 x i16> undef, undef
770 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
771 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
772 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
773 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
774 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
775 ;
776 ; SLM-LABEL: 'and'
777 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
778 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
779 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
780 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
781 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
782 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
783 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
784 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
785 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
786 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
787 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
788 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
789 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
790 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
791 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
792 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
793 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
794 ;
795 ; GLM-LABEL: 'and'
796 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
797 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
798 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
799 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
800 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
801 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
802 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
803 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
804 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
805 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
806 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
807 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
808 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
809 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
810 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
811 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
812 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
813 ;
476814 %I64 = and i64 undef, undef
477 ; SSSE3: cost of 1 {{.*}} %V2I64 = and
478 ; SSE42: cost of 1 {{.*}} %V2I64 = and
479 ; SLM: cost of 1 {{.*}} %V2I64 = and
480 ; GLM: cost of 1 {{.*}} %V2I64 = and
481 ; AVX: cost of 1 {{.*}} %V2I64 = and
482 ; AVX2: cost of 1 {{.*}} %V2I64 = and
483 ; AVX512: cost of 1 {{.*}} %V2I64 = and
484815 %V2I64 = and <2 x i64> undef, undef
485 ; SSSE3: cost of 2 {{.*}} %V4I64 = and
486 ; SSE42: cost of 2 {{.*}} %V4I64 = and
487 ; SLM: cost of 2 {{.*}} %V4I64 = and
488 ; GLM: cost of 2 {{.*}} %V4I64 = and
489 ; AVX: cost of 1 {{.*}} %V4I64 = and
490 ; AVX2: cost of 1 {{.*}} %V4I64 = and
491 ; AVX512: cost of 1 {{.*}} %V4I64 = and
492816 %V4I64 = and <4 x i64> undef, undef
493 ; SSSE3: cost of 4 {{.*}} %V8I64 = and
494 ; SSE42: cost of 4 {{.*}} %V8I64 = and
495 ; SLM: cost of 4 {{.*}} %V8I64 = and
496 ; GLM: cost of 4 {{.*}} %V8I64 = and
497 ; AVX: cost of 2 {{.*}} %V8I64 = and
498 ; AVX2: cost of 2 {{.*}} %V8I64 = and
499 ; AVX512: cost of 1 {{.*}} %V8I64 = and
500817 %V8I64 = and <8 x i64> undef, undef
501818
502 ; CHECK: cost of 1 {{.*}} %I32 = and
503819 %I32 = and i32 undef, undef
504 ; SSSE3: cost of 1 {{.*}} %V4I32 = and
505 ; SSE42: cost of 1 {{.*}} %V4I32 = and
506 ; SLM: cost of 1 {{.*}} %V4I32 = and
507 ; GLM: cost of 1 {{.*}} %V4I32 = and
508 ; AVX: cost of 1 {{.*}} %V4I32 = and
509 ; AVX2: cost of 1 {{.*}} %V4I32 = and
510 ; AVX512: cost of 1 {{.*}} %V4I32 = and
511820 %V4I32 = and <4 x i32> undef, undef
512 ; SSSE3: cost of 2 {{.*}} %V8I32 = and
513 ; SSE42: cost of 2 {{.*}} %V8I32 = and
514 ; SLM: cost of 2 {{.*}} %V8I32 = and
515 ; GLM: cost of 2 {{.*}} %V8I32 = and
516 ; AVX: cost of 1 {{.*}} %V8I32 = and
517 ; AVX2: cost of 1 {{.*}} %V8I32 = and
518 ; AVX512: cost of 1 {{.*}} %V8I32 = and
519821 %V8I32 = and <8 x i32> undef, undef
520 ; SSSE3: cost of 4 {{.*}} %V16I32 = and
521 ; SSE42: cost of 4 {{.*}} %V16I32 = and
522 ; SLM: cost of 4 {{.*}} %V16I32 = and
523 ; GLM: cost of 4 {{.*}} %V16I32 = and
524 ; AVX: cost of 2 {{.*}} %V16I32 = and
525 ; AVX2: cost of 2 {{.*}} %V16I32 = and
526 ; AVX512: cost of 1 {{.*}} %V16I32 = and
527822 %V16I32 = and <16 x i32> undef, undef
528823
529 ; CHECK: cost of 1 {{.*}} %I16 = and
530824 %I16 = and i16 undef, undef
531 ; SSSE3: cost of 1 {{.*}} %V8I16 = and
532 ; SSE42: cost of 1 {{.*}} %V8I16 = and
533 ; SLM: cost of 1 {{.*}} %V8I16 = and
534 ; GLM: cost of 1 {{.*}} %V8I16 = and
535 ; AVX: cost of 1 {{.*}} %V8I16 = and
536 ; AVX2: cost of 1 {{.*}} %V8I16 = and
537 ; AVX512: cost of 1 {{.*}} %V8I16 = and
538825 %V8I16 = and <8 x i16> undef, undef
539 ; SSSE3: cost of 2 {{.*}} %V16I16 = and
540 ; SSE42: cost of 2 {{.*}} %V16I16 = and
541 ; SLM: cost of 2 {{.*}} %V16I16 = and
542 ; GLM: cost of 2 {{.*}} %V16I16 = and
543 ; AVX: cost of 1 {{.*}} %V16I16 = and
544 ; AVX2: cost of 1 {{.*}} %V16I16 = and
545 ; AVX512: cost of 1 {{.*}} %V16I16 = and
546826 %V16I16 = and <16 x i16> undef, undef
547 ; SSSE3: cost of 4 {{.*}} %V32I16 = and
548 ; SSE42: cost of 4 {{.*}} %V32I16 = and
549 ; SLM: cost of 4 {{.*}} %V32I16 = and
550 ; GLM: cost of 4 {{.*}} %V32I16 = and
551 ; AVX: cost of 2 {{.*}} %V32I16 = and
552 ; AVX2: cost of 2 {{.*}} %V32I16 = and
553 ; AVX512F: cost of 2 {{.*}} %V32I16 = and
554 ; AVX512BW: cost of 1 {{.*}} %V32I16 = and
555827 %V32I16 = and <32 x i16> undef, undef
556828
557 ; CHECK: cost of 1 {{.*}} %I8 = and
558829 %I8 = and i8 undef, undef
559 ; SSSE3: cost of 1 {{.*}} %V16I8 = and
560 ; SSE42: cost of 1 {{.*}} %V16I8 = and
561 ; SLM: cost of 1 {{.*}} %V16I8 = and
562 ; GLM: cost of 1 {{.*}} %V16I8 = and
563 ; AVX: cost of 1 {{.*}} %V16I8 = and
564 ; AVX2: cost of 1 {{.*}} %V16I8 = and
565 ; AVX512: cost of 1 {{.*}} %V16I8 = and
566830 %V16I8 = and <16 x i8> undef, undef
567 ; SSSE3: cost of 2 {{.*}} %V32I8 = and
568 ; SSE42: cost of 2 {{.*}} %V32I8 = and
569 ; SLM: cost of 2 {{.*}} %V32I8 = and
570 ; GLM: cost of 2 {{.*}} %V32I8 = and
571 ; AVX: cost of 1 {{.*}} %V32I8 = and
572 ; AVX2: cost of 1 {{.*}} %V32I8 = and
573 ; AVX512: cost of 1 {{.*}} %V32I8 = and
574831 %V32I8 = and <32 x i8> undef, undef
575 ; SSSE3: cost of 4 {{.*}} %V64I8 = and
576 ; SSE42: cost of 4 {{.*}} %V64I8 = and
577 ; SLM: cost of 4 {{.*}} %V64I8 = and
578 ; GLM: cost of 4 {{.*}} %V64I8 = and
579 ; AVX: cost of 2 {{.*}} %V64I8 = and
580 ; AVX2: cost of 2 {{.*}} %V64I8 = and
581 ; AVX512F: cost of 2 {{.*}} %V64I8 = and
582 ; AVX512BW: cost of 1 {{.*}} %V64I8 = and
583832 %V64I8 = and <64 x i8> undef, undef
584833
585834 ret i32 undef
586835 }
587836
588 ; CHECK-LABEL: 'mul'
589837 define i32 @mul(i32 %arg) {
590 ; CHECK: cost of 1 {{.*}} %I64 = mul
838 ; SSSE3-LABEL: 'mul'
839 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
840 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
841 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = mul <4 x i64> undef, undef
842 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = mul <8 x i64> undef, undef
843 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
844 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = mul <4 x i32> undef, undef
845 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = mul <8 x i32> undef, undef
846 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = mul <16 x i32> undef, undef
847 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
848 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
849 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
850 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = mul <32 x i16> undef, undef
851 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
852 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
853 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = mul <32 x i8> undef, undef
854 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = mul <64 x i8> undef, undef
855 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
856 ;
857 ; SSE42-LABEL: 'mul'
858 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
859 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
860 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = mul <4 x i64> undef, undef
861 ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = mul <8 x i64> undef, undef
862 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
863 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = mul <4 x i32> undef, undef
864 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = mul <8 x i32> undef, undef
865 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = mul <16 x i32> undef, undef
866 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
867 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
868 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
869 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = mul <32 x i16> undef, undef
870 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
871 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
872 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = mul <32 x i8> undef, undef
873 ; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = mul <64 x i8> undef, undef
874 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
875 ;
876 ; AVX1-LABEL: 'mul'
877 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
878 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
879 ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = mul <4 x i64> undef, undef
880 ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = mul <8 x i64> undef, undef
881 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
882 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = mul <4 x i32> undef, undef
883 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = mul <8 x i32> undef, undef
884 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = mul <16 x i32> undef, undef
885 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
886 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
887 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = mul <16 x i16> undef, undef
888 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = mul <32 x i16> undef, undef
889 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
890 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
891 ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = mul <32 x i8> undef, undef
892 ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = mul <64 x i8> undef, undef
893 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
894 ;
895 ; AVX2-LABEL: 'mul'
896 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
897 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
898 ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = mul <4 x i64> undef, undef
899 ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = mul <8 x i64> undef, undef
900 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
901 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = mul <4 x i32> undef, undef
902 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = mul <8 x i32> undef, undef
903 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = mul <16 x i32> undef, undef
904 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
905 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
906 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
907 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = mul <32 x i16> undef, undef
908 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
909 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = mul <16 x i8> undef, undef
910 ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = mul <32 x i8> undef, undef
911 ; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = mul <64 x i8> undef, undef
912 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
913 ;
914 ; AVX512F-LABEL: 'mul'
915 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
916 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
917 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = mul <4 x i64> undef, undef
918 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = mul <8 x i64> undef, undef
919 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
920 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef
921 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef
922 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef
923 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
924 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
925 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
926 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = mul <32 x i16> undef, undef
927 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
928 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
929 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = mul <32 x i8> undef, undef
930 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = mul <64 x i8> undef, undef
931 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
932 ;
933 ; AVX512BW-LABEL: 'mul'
934 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
935 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
936 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = mul <4 x i64> undef, undef
937 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = mul <8 x i64> undef, undef
938 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
939 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef
940 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef
941 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef
942 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
943 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
944 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
945 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = mul <32 x i16> undef, undef
946 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
947 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
948 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = mul <32 x i8> undef, undef
949 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = mul <64 x i8> undef, undef
950 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
951 ;
952 ; AVX512DQ-LABEL: 'mul'
953 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
954 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef
955 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = mul <4 x i64> undef, undef
956 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = mul <8 x i64> undef, undef
957 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
958 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef
959 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = mul <8 x i32> undef, undef
960 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = mul <16 x i32> undef, undef
961 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
962 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
963 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = mul <16 x i16> undef, undef
964 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = mul <32 x i16> undef, undef
965 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
966 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
967 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = mul <32 x i8> undef, undef
968 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = mul <64 x i8> undef, undef
969 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
970 ;
971 ; SLM-LABEL: 'mul'
972 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
973 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = mul <2 x i64> undef, undef
974 ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = mul <4 x i64> undef, undef
975 ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = mul <8 x i64> undef, undef
976 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
977 ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = mul <4 x i32> undef, undef
978 ; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = mul <8 x i32> undef, undef
979 ; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = mul <16 x i32> undef, undef
980 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
981 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = mul <8 x i16> undef, undef
982 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = mul <16 x i16> undef, undef
983 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = mul <32 x i16> undef, undef
984 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
985 ; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = mul <16 x i8> undef, undef
986 ; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = mul <32 x i8> undef, undef
987 ; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = mul <64 x i8> undef, undef
988 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
989 ;
990 ; GLM-LABEL: 'mul'
991 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
992 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef
993 ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = mul <4 x i64> undef, undef
994 ; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = mul <8 x i64> undef, undef
995 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef
996 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = mul <4 x i32> undef, undef
997 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = mul <8 x i32> undef, undef
998 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = mul <16 x i32> undef, undef
999 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef
1000 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef
1001 ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = mul <16 x i16> undef, undef
1002 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = mul <32 x i16> undef, undef
1003 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef
1004 ; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef
1005 ; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = mul <32 x i8> undef, undef
1006 ; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = mul <64 x i8> undef, undef
1007 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1008 ;
5911009 %I64 = mul i64 undef, undef
592 ; SSSE3: cost of 8 {{.*}} %V2I64 = mul
593 ; SSE42: cost of 8 {{.*}} %V2I64 = mul
594 ; SLM: cost of 17 {{.*}} %V2I64 = mul
595 ; GLM: cost of 8 {{.*}} %V2I64 = mul
596 ; AVX: cost of 8 {{.*}} %V2I64 = mul
597 ; AVX2: cost of 8 {{.*}} %V2I64 = mul
598 ; AVX512F: cost of 8 {{.*}} %V2I64 = mul
599 ; AVX512BW: cost of 8 {{.*}} %V2I64 = mul
600 ; AVX512DQ: cost of 1 {{.*}} %V2I64 = mul
6011010 %V2I64 = mul <2 x i64> undef, undef
602 ; SSSE3: cost of 16 {{.*}} %V4I64 = mul
603 ; SSE42: cost of 16 {{.*}} %V4I64 = mul
604 ; SLM: cost of 34 {{.*}} %V4I64 = mul
605 ; GLM: cost of 16 {{.*}} %V4I64 = mul
606 ; AVX: cost of 18 {{.*}} %V4I64 = mul
607 ; AVX2: cost of 8 {{.*}} %V4I64 = mul
608 ; AVX512F: cost of 8 {{.*}} %V4I64 = mul
609 ; AVX512BW: cost of 8 {{.*}} %V4I64 = mul
610 ; AVX512DQ: cost of 1 {{.*}} %V4I64 = mul
6111011 %V4I64 = mul <4 x i64> undef, undef
612 ; SSSE3: cost of 32 {{.*}} %V8I64 = mul
613 ; SSE42: cost of 32 {{.*}} %V8I64 = mul
614 ; SLM: cost of 68 {{.*}} %V8I64 = mul
615 ; GLM: cost of 32 {{.*}} %V8I64 = mul
616 ; AVX: cost of 36 {{.*}} %V8I64 = mul
617 ; AVX2: cost of 16 {{.*}} %V8I64 = mul
618 ; AVX512F: cost of 8 {{.*}} %V8I64 = mul
619 ; AVX512BW: cost of 8 {{.*}} %V8I64 = mul
620 ; AVX512DQ: cost of 1 {{.*}} %V8I64 = mul
6211012 %V8I64 = mul <8 x i64> undef, undef
6221013
623 ; CHECK: cost of 1 {{.*}} %I32 = mul
6241014 %I32 = mul i32 undef, undef
625 ; SSSE3: cost of 6 {{.*}} %V4I32 = mul
626 ; SSE42: cost of 2 {{.*}} %V4I32 = mul
627 ; SLM: cost of 11 {{.*}} %V4I32 = mul
628 ; GLM: cost of 2 {{.*}} %V4I32 = mul
629 ; AVX: cost of 2 {{.*}} %V4I32 = mul
630 ; AVX2: cost of 2 {{.*}} %V4I32 = mul
631 ; AVX512: cost of 1 {{.*}} %V4I32 = mul
6321015 %V4I32 = mul <4 x i32> undef, undef
633 ; SSSE3: cost of 12 {{.*}} %V8I32 = mul
634 ; SSE42: cost of 4 {{.*}} %V8I32 = mul
635 ; SLM: cost of 22 {{.*}} %V8I32 = mul
636 ; GLM: cost of 4 {{.*}} %V8I32 = mul
637 ; AVX: cost of 4 {{.*}} %V8I32 = mul
638 ; AVX2: cost of 2 {{.*}} %V8I32 = mul
639 ; AVX512: cost of 1 {{.*}} %V8I32 = mul
6401016 %V8I32 = mul <8 x i32> undef, undef
641 ; SSSE3: cost of 24 {{.*}} %V16I32 = mul
642 ; SSE42: cost of 8 {{.*}} %V16I32 = mul
643 ; SLM: cost of 44 {{.*}} %V16I32 = mul
644 ; GLM: cost of 8 {{.*}} %V16I32 = mul
645 ; AVX: cost of 8 {{.*}} %V16I32 = mul
646 ; AVX2: cost of 4 {{.*}} %V16I32 = mul
647 ; AVX512: cost of 1 {{.*}} %V16I32 = mul
6481017 %V16I32 = mul <16 x i32> undef, undef
6491018
650 ; CHECK: cost of 1 {{.*}} %I16 = mul
6511019 %I16 = mul i16 undef, undef
652 ; SSSE3: cost of 1 {{.*}} %V8I16 = mul
653 ; SSE42: cost of 1 {{.*}} %V8I16 = mul
654 ; SLM: cost of 2 {{.*}} %V8I16 = mul
655 ; GLM: cost of 1 {{.*}} %V8I16 = mul
656 ; AVX: cost of 1 {{.*}} %V8I16 = mul
657 ; AVX2: cost of 1 {{.*}} %V8I16 = mul
658 ; AVX512: cost of 1 {{.*}} %V8I16 = mul
6591020 %V8I16 = mul <8 x i16> undef, undef
660 ; SSSE3: cost of 2 {{.*}} %V16I16 = mul
661 ; SSE42: cost of 2 {{.*}} %V16I16 = mul
662 ; SLM: cost of 4 {{.*}} %V16I16 = mul
663 ; GLM: cost of 2 {{.*}} %V16I16 = mul
664 ; AVX: cost of 4 {{.*}} %V16I16 = mul
665 ; AVX2: cost of 1 {{.*}} %V16I16 = mul
666 ; AVX512: cost of 1 {{.*}} %V16I16 = mul
6671021 %V16I16 = mul <16 x i16> undef, undef
668 ; SSSE3: cost of 4 {{.*}} %V32I16 = mul
669 ; SSE42: cost of 4 {{.*}} %V32I16 = mul
670 ; SLM: cost of 8 {{.*}} %V32I16 = mul
671 ; GLM: cost of 4 {{.*}} %V32I16 = mul
672 ; AVX: cost of 8 {{.*}} %V32I16 = mul
673 ; AVX2: cost of 2 {{.*}} %V32I16 = mul
674 ; AVX512F: cost of 2 {{.*}} %V32I16 = mul
675 ; AVX512BW: cost of 1 {{.*}} %V32I16 = mul
6761022 %V32I16 = mul <32 x i16> undef, undef
6771023
678 ; CHECK: cost of 1 {{.*}} %I8 = mul
6791024 %I8 = mul i8 undef, undef
680 ; SSSE3: cost of 12 {{.*}} %V16I8 = mul
681 ; SSE42: cost of 12 {{.*}} %V16I8 = mul
682 ; SLM: cost of 14 {{.*}} %V16I8 = mul
683 ; GLM: cost of 12 {{.*}} %V16I8 = mul
684 ; AVX: cost of 12 {{.*}} %V16I8 = mul
685 ; AVX2: cost of 7 {{.*}} %V16I8 = mul
686 ; AVX512F: cost of 5 {{.*}} %V16I8 = mul
687 ; AVX512BW: cost of 4 {{.*}} %V16I8 = mul
6881025 %V16I8 = mul <16 x i8> undef, undef
689 ; SSSE3: cost of 24 {{.*}} %V32I8 = mul
690 ; SSE42: cost of 24 {{.*}} %V32I8 = mul
691 ; SLM: cost of 28 {{.*}} %V32I8 = mul
692 ; GLM: cost of 24 {{.*}} %V32I8 = mul
693 ; AVX: cost of 26 {{.*}} %V32I8 = mul
694 ; AVX2: cost of 17 {{.*}} %V32I8 = mul
695 ; AVX512F: cost of 13 {{.*}} %V32I8 = mul
696 ; AVX512BW: cost of 4 {{.*}} %V32I8 = mul
6971026 %V32I8 = mul <32 x i8> undef, undef
698 ; SSSE3: cost of 48 {{.*}} %V64I8 = mul
699 ; SSE42: cost of 48 {{.*}} %V64I8 = mul
700 ; SLM: cost of 56 {{.*}} %V64I8 = mul
701 ; GLM: cost of 48 {{.*}} %V64I8 = mul
702 ; AVX: cost of 52 {{.*}} %V64I8 = mul
703 ; AVX2: cost of 34 {{.*}} %V64I8 = mul
704 ; AVX512F: cost of 26 {{.*}} %V64I8 = mul
705 ; AVX512BW: cost of 11 {{.*}} %V64I8 = mul
7061027 %V64I8 = mul <64 x i8> undef, undef
7071028
7081029 ret i32 undef
7091030 }
7101031
711 ; CHECK-LABEL: 'mul_2i32'
1032 ; A <2 x i32> gets expanded to a <2 x i64> vector.
1033 ; A <2 x i64> vector multiply is implemented using
1034 ; 3 PMULUDQ and 2 PADDS and 4 shifts.
7121035 define void @mul_2i32() {
713 ; A <2 x i32> gets expanded to a <2 x i64> vector.
714 ; A <2 x i64> vector multiply is implemented using
715 ; 3 PMULUDQ and 2 PADDS and 4 shifts.
716 ; SSSE3: cost of 8 {{.*}} %A0 = mul
717 ; SSE42: cost of 8 {{.*}} %A0 = mul
718 ; SLM: cost of 17 {{.*}} %A0 = mul
719 ; GLM: cost of 8 {{.*}} %A0 = mul
720 ; AVX: cost of 8 {{.*}} %A0 = mul
721 ; AVX2: cost of 8 {{.*}} %A0 = mul
722 ; AVX512F: cost of 8 {{.*}} %A0 = mul
723 ; AVX512BW: cost of 8 {{.*}} %A0 = mul
724 ; AVX512DQ: cost of 1 {{.*}} %A0 = mul
1036 ; SSE-LABEL: 'mul_2i32'
1037 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef
1038 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1039 ;
1040 ; AVX-LABEL: 'mul_2i32'
1041 ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef
1042 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1043 ;
1044 ; AVX512F-LABEL: 'mul_2i32'
1045 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef
1046 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1047 ;
1048 ; AVX512BW-LABEL: 'mul_2i32'
1049 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef
1050 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1051 ;
1052 ; AVX512DQ-LABEL: 'mul_2i32'
1053 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A0 = mul <2 x i32> undef, undef
1054 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1055 ;
1056 ; SLM-LABEL: 'mul_2i32'
1057 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %A0 = mul <2 x i32> undef, undef
1058 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1059 ;
1060 ; GLM-LABEL: 'mul_2i32'
1061 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef
1062 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1063 ;
7251064 %A0 = mul <2 x i32> undef, undef
7261065
7271066 ret void
217217 def add_asm_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
218218 # Label format is based on ASM string.
219219 check_label_format = '{} %s-LABEL: %s:'.format(comment_marker)
220 common.add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, True)
220 common.add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, True, False)
5252 r'(\s+)?[^)]*[^{]*\{\n(?P.*?)^\}$',
5353 flags=(re.M | re.S))
5454
55 ANALYZE_FUNCTION_RE = re.compile(
56 r'^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w-]+?)\':'
57 r'\s*\n(?P.*)$',
58 flags=(re.X | re.S))
59
5560 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
5661 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
5762 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
8186 continue
8287 func = m.group('func')
8388 scrubbed_body = scrubber(m.group('body'), *scrubber_args)
89 if m.groupdict().has_key('analysis'):
90 analysis = m.group('analysis')
91 if analysis.lower() != 'cost model analysis':
92 print('WARNING: Unsupported analysis mode: %r!' % (analysis,), file=sys.stderr)
8493 if func.startswith('stress'):
8594 # We only use the last line of the function body for stress tests.
8695 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
126135 return '[[' + get_value_name(var) + ']]'
127136
128137 # Replace IR value defs and uses with FileCheck variables.
129 def genericize_check_lines(lines):
138 def genericize_check_lines(lines, is_analyze):
130139 # This gets called for each match that occurs in
131140 # a line. We transform variables we haven't seen
132141 # into defs, and variables we have seen into uses.
151160 line = line.replace('%.', '%dot')
152161 # Ignore any comments, since the check lines will too.
153162 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
154 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
163 if is_analyze == False:
164 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
165 else:
166 lines[i] = scrubbed_line
155167 return lines
156168
157169
158 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm):
170 def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
159171 printed_prefixes = []
160172 for p in prefix_list:
161173 checkprefixes = p[0]
186198
187199 # For IR output, change all defs to FileCheck variables, so we're immune
188200 # to variable naming fashions.
189 func_body = genericize_check_lines(func_body)
201 func_body = genericize_check_lines(func_body, is_analyze)
190202
191203 # This could be selectively enabled with an optional invocation argument.
192204 # Disabled for now: better to check everything. Be safe rather than sorry.
225237 def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
226238 # Label format is based on IR string.
227239 check_label_format = '{} %s-LABEL: @%s('.format(comment_marker)
228 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False)
240 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, False)
241
242 def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
243 check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker)
244 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
0 #!/usr/bin/env python2.7
1
2 """A script to generate FileCheck statements for 'opt' analysis tests.
3
4 This script is a utility to update LLVM opt analysis test cases with new
5 FileCheck patterns. It can either update all of the tests in the file or
6 a single test function.
7
8 Example usage:
9 $ update_analyze_test_checks.py --opt=../bin/opt test/foo.ll
10
11 Workflow:
12 1. Make a compiler patch that requires updating some number of FileCheck lines
13 in regression test files.
14 2. Save the patch and revert it from your local work area.
15 3. Update the RUN-lines in the affected regression tests to look canonical.
16 Example: "; RUN: opt < %s -analyze -cost-model -S | FileCheck %s"
17 4. Refresh the FileCheck lines for either the entire file or select functions by
18 running this script.
19 5. Commit the fresh baseline of checks.
20 6. Apply your patch from step 1 and rebuild your local binaries.
21 7. Re-run this script on affected regression tests.
22 8. Check the diffs to ensure the script has done something reasonable.
23 9. Submit a patch including the regression test diffs for review.
24
25 A common pattern is to have the script insert complete checking of every
26 instruction. Then, edit it down to only check the relevant instructions.
27 The script is designed to make adding checks to a test case fast, it is *not*
28 designed to be authoratitive about what constitutes a good test!
29 """
30
31 import argparse
32 import itertools
33 import os # Used to advertise this file's name ("autogenerated_note").
34 import string
35 import subprocess
36 import sys
37 import tempfile
38 import re
39
40 from UpdateTestChecks import common
41
42 ADVERT = '; NOTE: Assertions have been autogenerated by '
43
44 # RegEx: this is where the magic happens.
45
46 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
47
48
49
50
51
52 def main():
53 from argparse import RawTextHelpFormatter
54 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
55 parser.add_argument('-v', '--verbose', action='store_true',
56 help='Show verbose output')
57 parser.add_argument('--opt-binary', default='opt',
58 help='The opt binary used to generate the test case')
59 parser.add_argument(
60 '--function', help='The function in the test file to update')
61 parser.add_argument('tests', nargs='+')
62 args = parser.parse_args()
63
64 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
65
66 opt_basename = os.path.basename(args.opt_binary)
67 if (opt_basename != "opt"):
68 print >>sys.stderr, 'ERROR: Unexpected opt name: ' + opt_basename
69 sys.exit(1)
70
71 for test in args.tests:
72 if args.verbose:
73 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
74 with open(test) as f:
75 input_lines = [l.rstrip() for l in f]
76
77 raw_lines = [m.group(1)
78 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
79 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
80 for l in raw_lines[1:]:
81 if run_lines[-1].endswith("\\"):
82 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
83 else:
84 run_lines.append(l)
85
86 if args.verbose:
87 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
88 for l in run_lines:
89 print >>sys.stderr, ' RUN: ' + l
90
91 prefix_list = []
92 for l in run_lines:
93 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
94
95 if not tool_cmd.startswith(opt_basename + ' '):
96 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l)
97 continue
98
99 if not filecheck_cmd.startswith('FileCheck '):
100 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
101 continue
102
103 tool_cmd_args = tool_cmd[len(opt_basename):].strip()
104 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
105
106 check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
107 for item in m.group(1).split(',')]
108 if not check_prefixes:
109 check_prefixes = ['CHECK']
110
111 # FIXME: We should use multiple check prefixes to common check lines. For
112 # now, we just ignore all but the last.
113 prefix_list.append((check_prefixes, tool_cmd_args))
114
115 func_dict = {}
116 for prefixes, _ in prefix_list:
117 for prefix in prefixes:
118 func_dict.update({prefix: dict()})
119 for prefixes, opt_args in prefix_list:
120 if args.verbose:
121 print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
122 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
123
124 raw_tool_outputs = common.invoke_tool(args.opt_binary, opt_args, test)
125
126 # Split analysis outputs by "Printing analysis " declarations.
127 for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
128 common.build_function_body_dictionary(
129 common.ANALYZE_FUNCTION_RE, common.scrub_body, [],
130 raw_tool_output, prefixes, func_dict, args.verbose)
131
132 is_in_function = False
133 is_in_function_start = False
134 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
135 if args.verbose:
136 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
137 output_lines = []
138 output_lines.append(autogenerated_note)
139
140 for input_line in input_lines:
141 if is_in_function_start:
142 if input_line == '':
143 continue
144 if input_line.lstrip().startswith(';'):
145 m = common.CHECK_RE.match(input_line)
146 if not m or m.group(1) not in prefix_set:
147 output_lines.append(input_line)
148 continue
149
150 # Print out the various check lines here.
151 common.add_analyze_checks(output_lines, ';', prefix_list, func_dict, func_name)
152 is_in_function_start = False
153
154 if is_in_function:
155 if common.should_add_line_to_output(input_line, prefix_set):
156 # This input line of the function body will go as-is into the output.
157 # Except make leading whitespace uniform: 2 spaces.
158 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
159 output_lines.append(input_line)
160 else:
161 continue
162 if input_line.strip() == '}':
163 is_in_function = False
164 continue
165
166 # Discard any previous script advertising.
167 if input_line.startswith(ADVERT):
168 continue
169
170 # If it's outside a function, it just gets copied to the output.
171 output_lines.append(input_line)
172
173 m = IR_FUNCTION_RE.match(input_line)
174 if not m:
175 continue
176 func_name = m.group(1)
177 if args.function is not None and func_name != args.function:
178 # When filtering on a specific function, skip all others.
179 continue
180 is_in_function = is_in_function_start = True
181
182 if args.verbose:
183 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
184
185 with open(test, 'wb') as f:
186 f.writelines([l + '\n' for l in output_lines])
187
188
189 if __name__ == '__main__':
190 main()