From b143c981693a0f405f16eade1cccf4933fde8e21 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Sat, 16 Nov 2024 16:27:20 +0800 Subject: [PATCH] cmd/compile: simplify bounded shift on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the shiftIsBounded function to generate more efficient shift instructions. This change also optimize shift ops when the shift value is v&63 and v&31. goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000-HV @ 2500.00MHz | CL 627855 | this CL | | sec/op | sec/op vs base | LeadingZeros 1.1005n ± 0% 0.8425n ± 1% -23.44% (p=0.000 n=10) LeadingZeros8 1.502n ± 0% 1.501n ± 0% -0.07% (p=0.001 n=10) LeadingZeros16 1.502n ± 0% 1.501n ± 0% -0.07% (p=0.000 n=10) LeadingZeros32 0.9511n ± 0% 0.8050n ± 0% -15.36% (p=0.000 n=10) LeadingZeros64 1.1195n ± 0% 0.8423n ± 0% -24.76% (p=0.000 n=10) TrailingZeros 0.8086n ± 0% 0.8005n ± 0% -1.00% (p=0.000 n=10) TrailingZeros8 1.031n ± 1% 1.035n ± 1% ~ (p=0.136 n=10) TrailingZeros16 0.8114n ± 0% 0.8254n ± 1% +1.73% (p=0.000 n=10) TrailingZeros32 0.8090n ± 0% 0.8005n ± 0% -1.05% (p=0.000 n=10) TrailingZeros64 0.8089n ± 1% 0.8005n ± 0% -1.04% (p=0.000 n=10) OnesCount 0.8677n ± 0% 1.2010n ± 0% +38.41% (p=0.000 n=10) OnesCount8 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) OnesCount16 0.9344n ± 0% 1.2010n ± 0% +28.53% (p=0.000 n=10) OnesCount32 0.8677n ± 0% 1.2010n ± 0% +38.41% (p=0.000 n=10) OnesCount64 1.2010n ± 0% 0.8671n ± 0% -27.80% (p=0.000 n=10) RotateLeft 0.8009n ± 0% 0.6671n ± 0% -16.71% (p=0.000 n=10) RotateLeft8 1.202n ± 0% 1.327n ± 0% +10.40% (p=0.000 n=10) RotateLeft16 0.8036n ± 0% 0.8218n ± 0% +2.26% (p=0.000 n=10) RotateLeft32 0.6674n ± 0% 0.8004n ± 0% +19.94% (p=0.000 n=10) RotateLeft64 0.6674n ± 0% 0.8004n ± 0% +19.94% (p=0.000 n=10) Reverse 0.4067n ± 1% 0.4122n ± 1% +1.38% (p=0.001 n=10) Reverse8 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Reverse16 0.8009n ± 0% 0.8005n ± 0% -0.05% (p=0.000 n=10) Reverse32 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.001 n=10) Reverse64 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.008 n=10) ReverseBytes 0.4057n ± 1% 0.4133n ± 1% +1.90% (p=0.000 n=10) ReverseBytes16 0.8009n ± 0% 0.8004n ± 0% -0.07% (p=0.000 n=10) ReverseBytes32 0.8009n ± 0% 0.8005n ± 0% -0.05% (p=0.000 n=10) ReverseBytes64 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add64multiple 1.832n ± 0% 1.828n ± 0% -0.22% (p=0.001 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.602n ± 0% 1.601n ± 0% -0.06% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Sub64multiple 2.402n ± 0% 2.400n ± 0% -0.10% (p=0.000 n=10) Mul 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Mul32 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Mul64 0.8008n ± 0% 0.8004n ± 0% -0.05% (p=0.000 n=10) Div 9.083n ± 0% 7.638n ± 0% -15.91% (p=0.000 n=10) Div32 4.011n ± 0% 4.009n ± 0% -0.05% (p=0.000 n=10) Div64 9.711n ± 0% 8.204n ± 0% -15.51% (p=0.000 n=10) geomean 1.083n 1.078n -0.40% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | CL 627855 | this CL | | sec/op | sec/op vs base | LeadingZeros 1.341n ± 4% 1.331n ± 2% -0.71% (p=0.008 n=10) LeadingZeros8 1.781n ± 0% 1.766n ± 1% -0.84% (p=0.011 n=10) LeadingZeros16 1.782n ± 0% 1.767n ± 0% -0.79% (p=0.001 n=10) LeadingZeros32 1.341n ± 1% 1.333n ± 0% -0.52% (p=0.001 n=10) LeadingZeros64 1.338n ± 0% 1.333n ± 0% -0.37% (p=0.008 n=10) TrailingZeros 0.9025n ± 0% 0.8077n ± 0% -10.50% (p=0.000 n=10) TrailingZeros8 1.056n ± 0% 1.089n ± 1% +3.17% (p=0.001 n=10) TrailingZeros16 1.101n ± 0% 1.102n ± 0% +0.09% (p=0.011 n=10) TrailingZeros32 0.9024n ± 1% 0.8083n ± 0% -10.43% (p=0.000 n=10) TrailingZeros64 0.9028n ± 1% 0.8087n ± 0% -10.43% (p=0.000 n=10) OnesCount 1.482n ± 1% 1.302n ± 0% -12.15% (p=0.000 n=10) OnesCount8 1.206n ± 0% 1.207n ± 2% +0.12% (p=0.000 n=10) OnesCount16 1.534n ± 0% 1.402n ± 0% -8.58% (p=0.000 n=10) OnesCount32 1.531n ± 1% 1.302n ± 0% -14.99% (p=0.000 n=10) OnesCount64 1.302n ± 0% 1.538n ± 1% +18.16% (p=0.000 n=10) RotateLeft 0.8083n ± 0% 0.8087n ± 1% ~ (p=0.579 n=10) RotateLeft8 1.310n ± 0% 1.323n ± 0% +0.95% (p=0.001 n=10) RotateLeft16 1.149n ± 0% 1.165n ± 1% +1.35% (p=0.001 n=10) RotateLeft32 0.8093n ± 0% 0.8105n ± 0% ~ (p=0.393 n=10) RotateLeft64 0.8088n ± 0% 0.8090n ± 0% ~ (p=0.739 n=10) Reverse 0.5109n ± 0% 0.5172n ± 1% +1.25% (p=0.000 n=10) Reverse8 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.000 n=10) Reverse16 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.002 n=10) Reverse32 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.000 n=10) Reverse64 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes 0.5122n ± 2% 0.5182n ± 1% ~ (p=0.060 n=10) ReverseBytes16 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes32 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes64 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.001 n=10) Add 1.201n ± 4% 1.202n ± 0% +0.08% (p=0.028 n=10) Add32 1.201n ± 0% 1.202n ± 2% +0.08% (p=0.014 n=10) Add64 1.201n ± 1% 1.202n ± 0% +0.08% (p=0.025 n=10) Add64multiple 1.902n ± 0% 1.913n ± 0% +0.55% (p=0.004 n=10) Sub 1.201n ± 0% 1.202n ± 3% +0.08% (p=0.001 n=10) Sub32 1.654n ± 0% 1.656n ± 1% ~ (p=0.117 n=10) Sub64 1.201n ± 0% 1.202n ± 0% +0.08% (p=0.001 n=10) Sub64multiple 2.180n ± 4% 2.159n ± 1% -0.96% (p=0.006 n=10) Mul 0.9345n ± 0% 0.9346n ± 0% +0.01% (p=0.000 n=10) Mul32 1.030n ± 0% 1.050n ± 1% +1.94% (p=0.000 n=10) Mul64 0.9345n ± 0% 0.9346n ± 1% +0.01% (p=0.000 n=10) Div 11.57n ± 1% 11.12n ± 0% -3.85% (p=0.000 n=10) Div32 4.337n ± 1% 4.341n ± 1% ~ (p=0.286 n=10) Div64 12.76n ± 0% 12.02n ± 3% -5.80% (p=0.000 n=10) geomean 1.252n 1.235n -1.32% Change-Id: Iec4cfd2b83bb0f946068c1d657369ff081d95b04 Reviewed-on: https://go-review.googlesource.com/c/go/+/628575 Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: David Chase --- .../compile/internal/ssa/_gen/LOONG64.rules | 124 ++- .../compile/internal/ssa/rewriteLOONG64.go | 968 ++++++++++++++++++ test/codegen/shift.go | 16 + 3 files changed, 1060 insertions(+), 48 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index c94e1e19e2..7fc40577ba 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -57,65 +57,84 @@ // shifts // hardware instruction uses only the low 6 bits of the shift // we compare to 64 to ensure Go semantics for large shifts -(Lsh64x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) -(Lsh64x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Lsh64x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Lsh64x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Lsh32x64 x y) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) -(Lsh32x32 x y) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) -(Lsh32x16 x y) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) -(Lsh32x8 x y) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) +// left shift +(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) +(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) +(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) +(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) -(Lsh16x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) -(Lsh16x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Lsh16x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Lsh16x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Lsh64x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +(Lsh64x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Lsh64x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Lsh64x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Lsh8x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) -(Lsh8x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Lsh8x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Lsh8x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Lsh32x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) +(Lsh32x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +(Lsh32x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +(Lsh32x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) -(Rsh64Ux64 x y) => (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) -(Rsh64Ux32 x y) => (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Rsh64Ux16 x y) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Rsh64Ux8 x y) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Lsh16x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +(Lsh16x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Lsh16x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Lsh16x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh32Ux64 x y) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) -(Rsh32Ux32 x y) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) -(Rsh32Ux16 x y) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) -(Rsh32Ux8 x y) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) +(Lsh8x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +(Lsh8x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Lsh8x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Lsh8x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh16Ux64 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) -(Rsh16Ux32 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Rsh16Ux16 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Rsh16Ux8 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +// unsigned right shift +(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV x y) +(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL x y) +(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt16to64 x) y) +(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt8to64 x) y) -(Rsh8Ux64 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) -(Rsh8Ux32 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Rsh8Ux16 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Rsh8Ux8 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Rsh64Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) +(Rsh64Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Rsh64Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Rsh64Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh64x64 x y) => (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) -(Rsh64x32 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) -(Rsh64x16 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) -(Rsh64x8 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +(Rsh32Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) +(Rsh32Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +(Rsh32Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +(Rsh32Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) -(Rsh32x64 x y) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) -(Rsh32x32 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) -(Rsh32x16 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) -(Rsh32x8 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) +(Rsh16Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) +(Rsh16Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Rsh16Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Rsh16Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh16x64 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) -(Rsh16x32 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) -(Rsh16x16 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) -(Rsh16x8 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +(Rsh8Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) +(Rsh8Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +(Rsh8Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +(Rsh8Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh8x64 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) -(Rsh8x32 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) -(Rsh8x16 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) -(Rsh8x8 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +// signed right shift +(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV x y) +(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y) +(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt16to64 x) y) +(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt8to64 x) y) + +(Rsh64x64 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +(Rsh64x32 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +(Rsh64x16 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +(Rsh64x8 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + +(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) +(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) +(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) +(Rsh32x8 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) + +(Rsh16x64 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +(Rsh16x32 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +(Rsh16x16 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +(Rsh16x8 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + +(Rsh8x64 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +(Rsh8x32 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +(Rsh8x16 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +(Rsh8x8 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) // bitfield ops @@ -700,6 +719,15 @@ (ROTR x (MOVVconst [c])) => (ROTRconst x [c&31]) (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63]) +// SLLV/SRLV/SRAV only considers the bottom 6 bits of y, similarly SLL/SRL/SRA only considers the +// bottom 5 bits of y. +(SLL x (ANDconst [31] y)) => (SLL x y) +(SRL x (ANDconst [31] y)) => (SRL x y) +(SRA x (ANDconst [31] y)) => (SRA x y) +(SLLV x (ANDconst [63] y)) => (SLLV x y) +(SRLV x (ANDconst [63] y)) => (SRLV x y) +(SRAV x (ANDconst [63] y)) => (SRAV x y) + // Avoid unnecessary zero and sign extension when right shifting. (SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y) (SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y) diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index d3cdf8b9bc..ea6aee8250 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -6072,6 +6072,18 @@ func rewriteValueLOONG64_OpLOONG64SLL(v *Value) bool { v.AddArg(x) return true } + // match: (SLL x (ANDconst [31] y)) + // result: (SLL x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SLL) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { @@ -6105,6 +6117,18 @@ func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { v.AddArg(x) return true } + // match: (SLLV x (ANDconst [63] y)) + // result: (SLLV x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SLLVconst(v *Value) bool { @@ -6160,6 +6184,18 @@ func rewriteValueLOONG64_OpLOONG64SRA(v *Value) bool { v.AddArg(x) return true } + // match: (SRA x (ANDconst [31] y)) + // result: (SRA x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SRA) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { @@ -6195,6 +6231,18 @@ func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { v.AddArg(x) return true } + // match: (SRAV x (ANDconst [63] y)) + // result: (SRAV x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SRAV) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { @@ -6325,6 +6373,18 @@ func rewriteValueLOONG64_OpLOONG64SRL(v *Value) bool { v.AddArg(x) return true } + // match: (SRL x (ANDconst [31] y)) + // result: (SRL x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SRL) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { @@ -6358,6 +6418,18 @@ func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { v.AddArg(x) return true } + // match: (SRLV x (ANDconst [63] y)) + // result: (SRLV x y) + for { + x := v_0 + if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { + break + } + y := v_1.Args[0] + v.reset(OpLOONG64SRLV) + v.AddArg2(x, y) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool { @@ -7458,12 +7530,29 @@ func rewriteValueLOONG64_OpLsh16x16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh16x16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -7476,18 +7565,36 @@ func rewriteValueLOONG64_OpLsh16x16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh16x32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -7500,18 +7607,36 @@ func rewriteValueLOONG64_OpLsh16x32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh16x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh16x64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v0.AddArg2(x, y) @@ -7522,18 +7647,36 @@ func rewriteValueLOONG64_OpLsh16x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpLsh16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh16x8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -7546,18 +7689,36 @@ func rewriteValueLOONG64_OpLsh16x8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLL) + v.AddArg2(x, y) + return true + } // match: (Lsh32x16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -7570,18 +7731,36 @@ func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLL) + v.AddArg2(x, y) + return true + } // match: (Lsh32x32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -7594,18 +7773,36 @@ func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLL) + v.AddArg2(x, y) + return true + } // match: (Lsh32x64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v0.AddArg2(x, y) @@ -7616,18 +7813,36 @@ func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLL) + v.AddArg2(x, y) + return true + } // match: (Lsh32x8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -7640,18 +7855,36 @@ func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh64x16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -7664,18 +7897,36 @@ func rewriteValueLOONG64_OpLsh64x16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh64x32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -7688,18 +7939,36 @@ func rewriteValueLOONG64_OpLsh64x32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh64x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh64x64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v0.AddArg2(x, y) @@ -7710,18 +7979,36 @@ func rewriteValueLOONG64_OpLsh64x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpLsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh64x8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -7734,18 +8021,36 @@ func rewriteValueLOONG64_OpLsh64x8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh8x16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -7758,18 +8063,36 @@ func rewriteValueLOONG64_OpLsh8x16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh8x32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -7782,18 +8105,36 @@ func rewriteValueLOONG64_OpLsh8x32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpLsh8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh8x64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v0.AddArg2(x, y) @@ -7804,18 +8145,36 @@ func rewriteValueLOONG64_OpLsh8x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpLsh8x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Lsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SLLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SLLV) + v.AddArg2(x, y) + return true + } // match: (Lsh8x8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -7828,6 +8187,7 @@ func rewriteValueLOONG64_OpLsh8x8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpMod16(v *Value) bool { v_1 := v.Args[1] @@ -8771,12 +9131,31 @@ func rewriteValueLOONG64_OpRsh16Ux16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16Ux16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -8791,18 +9170,38 @@ func rewriteValueLOONG64_OpRsh16Ux16(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh16Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16Ux32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -8817,18 +9216,38 @@ func rewriteValueLOONG64_OpRsh16Ux32(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh16Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16Ux64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -8841,18 +9260,38 @@ func rewriteValueLOONG64_OpRsh16Ux64(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh16Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16Ux8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -8867,18 +9306,38 @@ func rewriteValueLOONG64_OpRsh16Ux8(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16x16 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) v0.AddArg(x) @@ -8895,18 +9354,38 @@ func rewriteValueLOONG64_OpRsh16x16(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16x32 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) v0.AddArg(x) @@ -8923,18 +9402,38 @@ func rewriteValueLOONG64_OpRsh16x32(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh16x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16x64 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) v0.AddArg(x) @@ -8949,18 +9448,38 @@ func rewriteValueLOONG64_OpRsh16x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh16x8 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) v0.AddArg(x) @@ -8977,18 +9496,36 @@ func rewriteValueLOONG64_OpRsh16x8(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRL) + v.AddArg2(x, y) + return true + } // match: (Rsh32Ux16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -9001,18 +9538,36 @@ func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRL) + v.AddArg2(x, y) + return true + } // match: (Rsh32Ux32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -9025,18 +9580,36 @@ func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRL) + v.AddArg2(x, y) + return true + } // match: (Rsh32Ux64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) v0.AddArg2(x, y) @@ -9047,18 +9620,36 @@ func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRL) + v.AddArg2(x, y) + return true + } // match: (Rsh32Ux8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9071,18 +9662,36 @@ func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SRA x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRA) + v.AddArg2(x, y) + return true + } // match: (Rsh32x16 x y) + // cond: !shiftIsBounded(v) // result: (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRA) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9097,18 +9706,36 @@ func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SRA x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRA) + v.AddArg2(x, y) + return true + } // match: (Rsh32x32 x y) + // cond: !shiftIsBounded(v) // result: (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRA) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9123,18 +9750,36 @@ func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SRA x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRA) + v.AddArg2(x, y) + return true + } // match: (Rsh32x64 x y) + // cond: !shiftIsBounded(v) // result: (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRA) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9147,18 +9792,36 @@ func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SRA x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRA) + v.AddArg2(x, y) + return true + } // match: (Rsh32x8 x y) + // cond: !shiftIsBounded(v) // result: (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRA) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9173,18 +9836,36 @@ func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh64Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SRLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v.AddArg2(x, y) + return true + } // match: (Rsh64Ux16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) @@ -9197,18 +9878,36 @@ func rewriteValueLOONG64_OpRsh64Ux16(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh64Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SRLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v.AddArg2(x, y) + return true + } // match: (Rsh64Ux32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) @@ -9221,18 +9920,36 @@ func rewriteValueLOONG64_OpRsh64Ux32(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh64Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SRLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v.AddArg2(x, y) + return true + } // match: (Rsh64Ux64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v0.AddArg2(x, y) @@ -9243,18 +9960,36 @@ func rewriteValueLOONG64_OpRsh64Ux64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh64Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SRLV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v.AddArg2(x, y) + return true + } // match: (Rsh64Ux8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9267,18 +10002,36 @@ func rewriteValueLOONG64_OpRsh64Ux8(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SRAV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v.AddArg2(x, y) + return true + } // match: (Rsh64x16 x y) + // cond: !shiftIsBounded(v) // result: (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9293,18 +10046,36 @@ func rewriteValueLOONG64_OpRsh64x16(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SRAV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v.AddArg2(x, y) + return true + } // match: (Rsh64x32 x y) + // cond: !shiftIsBounded(v) // result: (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9319,18 +10090,36 @@ func rewriteValueLOONG64_OpRsh64x32(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh64x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SRAV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v.AddArg2(x, y) + return true + } // match: (Rsh64x64 x y) + // cond: !shiftIsBounded(v) // result: (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9343,18 +10132,36 @@ func rewriteValueLOONG64_OpRsh64x64(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SRAV x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v.AddArg2(x, y) + return true + } // match: (Rsh64x8 x y) + // cond: !shiftIsBounded(v) // result: (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) @@ -9369,18 +10176,38 @@ func rewriteValueLOONG64_OpRsh64x8(v *Value) bool { v.AddArg2(x, v0) return true } + return false } func rewriteValueLOONG64_OpRsh8Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8Ux16 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9395,18 +10222,38 @@ func rewriteValueLOONG64_OpRsh8Ux16(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh8Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8Ux32 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9421,18 +10268,38 @@ func rewriteValueLOONG64_OpRsh8Ux32(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh8Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8Ux64 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9445,18 +10312,38 @@ func rewriteValueLOONG64_OpRsh8Ux64(v *Value) bool { v.AddArg2(v0, v2) return true } + return false } func rewriteValueLOONG64_OpRsh8Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SRLV (ZeroExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRLV) + v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8Ux8 x y) + // cond: !shiftIsBounded(v) // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64MASKEQZ) v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) @@ -9471,18 +10358,38 @@ func rewriteValueLOONG64_OpRsh8Ux8(v *Value) bool { v.AddArg2(v0, v3) return true } + return false } func rewriteValueLOONG64_OpRsh8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8x16 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) v0.AddArg(x) @@ -9499,18 +10406,38 @@ func rewriteValueLOONG64_OpRsh8x16(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8x32 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) v0.AddArg(x) @@ -9527,18 +10454,38 @@ func rewriteValueLOONG64_OpRsh8x32(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8x64 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) v0.AddArg(x) @@ -9553,18 +10500,38 @@ func rewriteValueLOONG64_OpRsh8x64(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpRsh8x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Rsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) + v.AddArg2(v0, y) + return true + } // match: (Rsh8x8 x y) + // cond: !shiftIsBounded(v) // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 + if !(!shiftIsBounded(v)) { + break + } v.reset(OpLOONG64SRAV) v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) v0.AddArg(x) @@ -9581,6 +10548,7 @@ func rewriteValueLOONG64_OpRsh8x8(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueLOONG64_OpSelect0(v *Value) bool { v_0 := v.Args[0] diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 8254e974df..02842a5739 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -145,6 +145,7 @@ func lshConst64x2Add(x int64) int64 { func lshMask64x64(v int64, s uint64) int64 { // arm64:"LSL",-"AND" + // loong64:"SLLV",-"AND" // ppc64x:"RLDICL",-"ORN",-"ISEL" // riscv64:"SLL",-"AND\t",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -153,6 +154,7 @@ func lshMask64x64(v int64, s uint64) int64 { func rshMask64Ux64(v uint64, s uint64) uint64 { // arm64:"LSR",-"AND",-"CSEL" + // loong64:"SRLV",-"AND" // ppc64x:"RLDICL",-"ORN",-"ISEL" // riscv64:"SRL\t",-"AND\t",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -161,6 +163,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 { func rshMask64x64(v int64, s uint64) int64 { // arm64:"ASR",-"AND",-"CSEL" + // loong64:"SRAV",-"AND" // ppc64x:"RLDICL",-"ORN",-"ISEL" // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -169,14 +172,21 @@ func rshMask64x64(v int64, s uint64) int64 { func lshMask32x64(v int32, s uint64) int32 { // arm64:"LSL",-"AND" + // loong64:"SLL\t","AND","SGTU","MASKEQZ" // ppc64x:"ISEL",-"ORN" // riscv64:"SLL",-"AND\t",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" return v << (s & 63) } +func lsh5Mask32x64(v int32, s uint64) int32 { + // loong64:"SLL\t",-"AND" + return v << (s & 31) +} + func rshMask32Ux64(v uint32, s uint64) uint32 { // arm64:"LSR",-"AND" + // loong64:"SRL\t","AND","SGTU","MASKEQZ" // ppc64x:"ISEL",-"ORN" // riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -184,12 +194,14 @@ func rshMask32Ux64(v uint32, s uint64) uint32 { } func rsh5Mask32Ux64(v uint32, s uint64) uint32 { + // loong64:"SRL\t",-"AND" // riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t" return v >> (s & 31) } func rshMask32x64(v int32, s uint64) int32 { // arm64:"ASR",-"AND" + // loong64:"SRA\t","AND","SGTU","SUBVU","OR" // ppc64x:"ISEL",-"ORN" // riscv64:"SRAW","OR","SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -197,12 +209,14 @@ func rshMask32x64(v int32, s uint64) int32 { } func rsh5Mask32x64(v int32, s uint64) int32 { + // loong64:"SRA\t",-"AND" // riscv64:"SRAW",-"OR",-"SLTIU" return v >> (s & 31) } func lshMask64x32(v int64, s uint32) int64 { // arm64:"LSL",-"AND" + // loong64:"SLLV",-"AND" // ppc64x:"RLDICL",-"ORN" // riscv64:"SLL",-"AND\t",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -211,6 +225,7 @@ func lshMask64x32(v int64, s uint32) int64 { func rshMask64Ux32(v uint64, s uint32) uint64 { // arm64:"LSR",-"AND",-"CSEL" + // loong64:"SRLV",-"AND" // ppc64x:"RLDICL",-"ORN" // riscv64:"SRL\t",-"AND\t",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR" @@ -219,6 +234,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 { func rshMask64x32(v int64, s uint32) int64 { // arm64:"ASR",-"AND",-"CSEL" + // loong64:"SRAV",-"AND" // ppc64x:"RLDICL",-"ORN",-"ISEL" // riscv64:"SRA\t",-"OR",-"SLTIU" // s390x:-"RISBGZ",-"AND",-"LOCGR"