mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
cmd/compile: simplify bounded shift on loong64
Use the shiftIsBounded function to generate more efficient shift instructions. This change also optimize shift ops when the shift value is v&63 and v&31. goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000-HV @ 2500.00MHz | CL 627855 | this CL | | sec/op | sec/op vs base | LeadingZeros 1.1005n ± 0% 0.8425n ± 1% -23.44% (p=0.000 n=10) LeadingZeros8 1.502n ± 0% 1.501n ± 0% -0.07% (p=0.001 n=10) LeadingZeros16 1.502n ± 0% 1.501n ± 0% -0.07% (p=0.000 n=10) LeadingZeros32 0.9511n ± 0% 0.8050n ± 0% -15.36% (p=0.000 n=10) LeadingZeros64 1.1195n ± 0% 0.8423n ± 0% -24.76% (p=0.000 n=10) TrailingZeros 0.8086n ± 0% 0.8005n ± 0% -1.00% (p=0.000 n=10) TrailingZeros8 1.031n ± 1% 1.035n ± 1% ~ (p=0.136 n=10) TrailingZeros16 0.8114n ± 0% 0.8254n ± 1% +1.73% (p=0.000 n=10) TrailingZeros32 0.8090n ± 0% 0.8005n ± 0% -1.05% (p=0.000 n=10) TrailingZeros64 0.8089n ± 1% 0.8005n ± 0% -1.04% (p=0.000 n=10) OnesCount 0.8677n ± 0% 1.2010n ± 0% +38.41% (p=0.000 n=10) OnesCount8 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) OnesCount16 0.9344n ± 0% 1.2010n ± 0% +28.53% (p=0.000 n=10) OnesCount32 0.8677n ± 0% 1.2010n ± 0% +38.41% (p=0.000 n=10) OnesCount64 1.2010n ± 0% 0.8671n ± 0% -27.80% (p=0.000 n=10) RotateLeft 0.8009n ± 0% 0.6671n ± 0% -16.71% (p=0.000 n=10) RotateLeft8 1.202n ± 0% 1.327n ± 0% +10.40% (p=0.000 n=10) RotateLeft16 0.8036n ± 0% 0.8218n ± 0% +2.26% (p=0.000 n=10) RotateLeft32 0.6674n ± 0% 0.8004n ± 0% +19.94% (p=0.000 n=10) RotateLeft64 0.6674n ± 0% 0.8004n ± 0% +19.94% (p=0.000 n=10) Reverse 0.4067n ± 1% 0.4122n ± 1% +1.38% (p=0.001 n=10) Reverse8 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Reverse16 0.8009n ± 0% 0.8005n ± 0% -0.05% (p=0.000 n=10) Reverse32 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.001 n=10) Reverse64 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.008 n=10) ReverseBytes 0.4057n ± 1% 0.4133n ± 1% +1.90% (p=0.000 n=10) ReverseBytes16 0.8009n ± 0% 0.8004n ± 0% -0.07% (p=0.000 n=10) ReverseBytes32 0.8009n ± 0% 0.8005n ± 0% -0.05% (p=0.000 n=10) ReverseBytes64 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add64multiple 1.832n ± 0% 1.828n ± 0% -0.22% (p=0.001 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.602n ± 0% 1.601n ± 0% -0.06% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Sub64multiple 2.402n ± 0% 2.400n ± 0% -0.10% (p=0.000 n=10) Mul 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Mul32 0.8009n ± 0% 0.8004n ± 0% -0.06% (p=0.000 n=10) Mul64 0.8008n ± 0% 0.8004n ± 0% -0.05% (p=0.000 n=10) Div 9.083n ± 0% 7.638n ± 0% -15.91% (p=0.000 n=10) Div32 4.011n ± 0% 4.009n ± 0% -0.05% (p=0.000 n=10) Div64 9.711n ± 0% 8.204n ± 0% -15.51% (p=0.000 n=10) geomean 1.083n 1.078n -0.40% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | CL 627855 | this CL | | sec/op | sec/op vs base | LeadingZeros 1.341n ± 4% 1.331n ± 2% -0.71% (p=0.008 n=10) LeadingZeros8 1.781n ± 0% 1.766n ± 1% -0.84% (p=0.011 n=10) LeadingZeros16 1.782n ± 0% 1.767n ± 0% -0.79% (p=0.001 n=10) LeadingZeros32 1.341n ± 1% 1.333n ± 0% -0.52% (p=0.001 n=10) LeadingZeros64 1.338n ± 0% 1.333n ± 0% -0.37% (p=0.008 n=10) TrailingZeros 0.9025n ± 0% 0.8077n ± 0% -10.50% (p=0.000 n=10) TrailingZeros8 1.056n ± 0% 1.089n ± 1% +3.17% (p=0.001 n=10) TrailingZeros16 1.101n ± 0% 1.102n ± 0% +0.09% (p=0.011 n=10) TrailingZeros32 0.9024n ± 1% 0.8083n ± 0% -10.43% (p=0.000 n=10) TrailingZeros64 0.9028n ± 1% 0.8087n ± 0% -10.43% (p=0.000 n=10) OnesCount 1.482n ± 1% 1.302n ± 0% -12.15% (p=0.000 n=10) OnesCount8 1.206n ± 0% 1.207n ± 2% +0.12% (p=0.000 n=10) OnesCount16 1.534n ± 0% 1.402n ± 0% -8.58% (p=0.000 n=10) OnesCount32 1.531n ± 1% 1.302n ± 0% -14.99% (p=0.000 n=10) OnesCount64 1.302n ± 0% 1.538n ± 1% +18.16% (p=0.000 n=10) RotateLeft 0.8083n ± 0% 0.8087n ± 1% ~ (p=0.579 n=10) RotateLeft8 1.310n ± 0% 1.323n ± 0% +0.95% (p=0.001 n=10) RotateLeft16 1.149n ± 0% 1.165n ± 1% +1.35% (p=0.001 n=10) RotateLeft32 0.8093n ± 0% 0.8105n ± 0% ~ (p=0.393 n=10) RotateLeft64 0.8088n ± 0% 0.8090n ± 0% ~ (p=0.739 n=10) Reverse 0.5109n ± 0% 0.5172n ± 1% +1.25% (p=0.000 n=10) Reverse8 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.000 n=10) Reverse16 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.002 n=10) Reverse32 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.000 n=10) Reverse64 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes 0.5122n ± 2% 0.5182n ± 1% ~ (p=0.060 n=10) ReverseBytes16 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes32 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.005 n=10) ReverseBytes64 0.8010n ± 0% 0.8011n ± 0% +0.01% (p=0.001 n=10) Add 1.201n ± 4% 1.202n ± 0% +0.08% (p=0.028 n=10) Add32 1.201n ± 0% 1.202n ± 2% +0.08% (p=0.014 n=10) Add64 1.201n ± 1% 1.202n ± 0% +0.08% (p=0.025 n=10) Add64multiple 1.902n ± 0% 1.913n ± 0% +0.55% (p=0.004 n=10) Sub 1.201n ± 0% 1.202n ± 3% +0.08% (p=0.001 n=10) Sub32 1.654n ± 0% 1.656n ± 1% ~ (p=0.117 n=10) Sub64 1.201n ± 0% 1.202n ± 0% +0.08% (p=0.001 n=10) Sub64multiple 2.180n ± 4% 2.159n ± 1% -0.96% (p=0.006 n=10) Mul 0.9345n ± 0% 0.9346n ± 0% +0.01% (p=0.000 n=10) Mul32 1.030n ± 0% 1.050n ± 1% +1.94% (p=0.000 n=10) Mul64 0.9345n ± 0% 0.9346n ± 1% +0.01% (p=0.000 n=10) Div 11.57n ± 1% 11.12n ± 0% -3.85% (p=0.000 n=10) Div32 4.337n ± 1% 4.341n ± 1% ~ (p=0.286 n=10) Div64 12.76n ± 0% 12.02n ± 3% -5.80% (p=0.000 n=10) geomean 1.252n 1.235n -1.32% Change-Id: Iec4cfd2b83bb0f946068c1d657369ff081d95b04 Reviewed-on: https://go-review.googlesource.com/c/go/+/628575 Reviewed-by: abner chenc <chenguoqi@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
b10c35945d
commit
b143c98169
@ -57,65 +57,84 @@
|
||||
// shifts
|
||||
// hardware instruction uses only the low 6 bits of the shift
|
||||
// we compare to 64 to ensure Go semantics for large shifts
|
||||
(Lsh64x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh64x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh64x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh64x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Lsh32x64 <t> x y) => (MASKEQZ (SLL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
|
||||
(Lsh32x32 <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
|
||||
(Lsh32x16 <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
|
||||
(Lsh32x8 <t> x y) => (MASKEQZ (SLL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
|
||||
// left shift
|
||||
(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y)
|
||||
(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y)
|
||||
(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y)
|
||||
(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y)
|
||||
|
||||
(Lsh16x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh16x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh16x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh16x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh64x32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh64x16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh64x8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Lsh8x64 <t> x y) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh8x32 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh8x16 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh8x8 <t> x y) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
(Lsh32x64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
|
||||
(Lsh32x32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
|
||||
(Lsh32x16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
|
||||
(Lsh32x8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh64Ux64 <t> x y) => (MASKEQZ (SRLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh64Ux32 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh64Ux16 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh64Ux8 <t> x y) => (MASKEQZ (SRLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
(Lsh16x64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh16x32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh16x16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh16x8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh32Ux64 <t> x y) => (MASKEQZ (SRL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
|
||||
(Rsh32Ux32 <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
|
||||
(Rsh32Ux16 <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
|
||||
(Rsh32Ux8 <t> x y) => (MASKEQZ (SRL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
|
||||
(Lsh8x64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Lsh8x32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Lsh8x16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Lsh8x8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh16Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh16Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh16Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh16Ux8 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
// unsigned right shift
|
||||
(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV x y)
|
||||
(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL x y)
|
||||
(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt16to64 x) y)
|
||||
(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt8to64 x) y)
|
||||
|
||||
(Rsh8Ux64 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh8Ux32 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh8Ux16 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh8Ux8 <t> x y) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
(Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> x y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh64Ux32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh64Ux16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh64Ux8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh64x64 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh64x32 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh64x16 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh64x8 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
(Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRL <t> x y) (SGTU (MOVVconst <typ.UInt64> [32]) y))
|
||||
(Rsh32Ux32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRL <t> x (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt32to64 y)))
|
||||
(Rsh32Ux16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRL <t> x (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt16to64 y)))
|
||||
(Rsh32Ux8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRL <t> x (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [32]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh32x64 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [31]))) y))
|
||||
(Rsh32x32 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt32to64 y)))
|
||||
(Rsh32x16 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt16to64 y)))
|
||||
(Rsh32x8 <t> x y) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt8to64 y)))
|
||||
(Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh16Ux32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh16Ux16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh16Ux8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh16x64 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh16x32 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh16x16 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh16x8 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
(Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) y) (SGTU (MOVVconst <typ.UInt64> [64]) y))
|
||||
(Rsh8Ux32 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y)))
|
||||
(Rsh8Ux16 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y)))
|
||||
(Rsh8Ux8 <t> x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh8x64 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh8x32 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh8x8 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
// signed right shift
|
||||
(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV x y)
|
||||
(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y)
|
||||
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt16to64 x) y)
|
||||
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt8to64 x) y)
|
||||
|
||||
(Rsh64x64 <t> x y) && !shiftIsBounded(v) => (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh64x32 <t> x y) && !shiftIsBounded(v) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh64x16 <t> x y) && !shiftIsBounded(v) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh64x8 <t> x y) && !shiftIsBounded(v) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh32x64 <t> x y) && !shiftIsBounded(v) => (SRA x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [31]))) y))
|
||||
(Rsh32x32 <t> x y) && !shiftIsBounded(v) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt32to64 y)))
|
||||
(Rsh32x16 <t> x y) && !shiftIsBounded(v) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt16to64 y)))
|
||||
(Rsh32x8 <t> x y) && !shiftIsBounded(v) => (SRA x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [31]))) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh16x64 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh16x32 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh16x16 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh16x8 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
|
||||
(Rsh8x64 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
|
||||
(Rsh8x32 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
|
||||
(Rsh8x16 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
|
||||
(Rsh8x8 <t> x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
|
||||
|
||||
// bitfield ops
|
||||
|
||||
@ -700,6 +719,15 @@
|
||||
(ROTR x (MOVVconst [c])) => (ROTRconst x [c&31])
|
||||
(ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63])
|
||||
|
||||
// SLLV/SRLV/SRAV only considers the bottom 6 bits of y, similarly SLL/SRL/SRA only considers the
|
||||
// bottom 5 bits of y.
|
||||
(SLL x (ANDconst [31] y)) => (SLL x y)
|
||||
(SRL x (ANDconst [31] y)) => (SRL x y)
|
||||
(SRA x (ANDconst [31] y)) => (SRA x y)
|
||||
(SLLV x (ANDconst [63] y)) => (SLLV x y)
|
||||
(SRLV x (ANDconst [63] y)) => (SRLV x y)
|
||||
(SRAV x (ANDconst [63] y)) => (SRAV x y)
|
||||
|
||||
// Avoid unnecessary zero and sign extension when right shifting.
|
||||
(SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y)
|
||||
(SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -145,6 +145,7 @@ func lshConst64x2Add(x int64) int64 {
|
||||
|
||||
func lshMask64x64(v int64, s uint64) int64 {
|
||||
// arm64:"LSL",-"AND"
|
||||
// loong64:"SLLV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
||||
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -153,6 +154,7 @@ func lshMask64x64(v int64, s uint64) int64 {
|
||||
|
||||
func rshMask64Ux64(v uint64, s uint64) uint64 {
|
||||
// arm64:"LSR",-"AND",-"CSEL"
|
||||
// loong64:"SRLV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
||||
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -161,6 +163,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 {
|
||||
|
||||
func rshMask64x64(v int64, s uint64) int64 {
|
||||
// arm64:"ASR",-"AND",-"CSEL"
|
||||
// loong64:"SRAV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
||||
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -169,14 +172,21 @@ func rshMask64x64(v int64, s uint64) int64 {
|
||||
|
||||
func lshMask32x64(v int32, s uint64) int32 {
|
||||
// arm64:"LSL",-"AND"
|
||||
// loong64:"SLL\t","AND","SGTU","MASKEQZ"
|
||||
// ppc64x:"ISEL",-"ORN"
|
||||
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
return v << (s & 63)
|
||||
}
|
||||
|
||||
func lsh5Mask32x64(v int32, s uint64) int32 {
|
||||
// loong64:"SLL\t",-"AND"
|
||||
return v << (s & 31)
|
||||
}
|
||||
|
||||
func rshMask32Ux64(v uint32, s uint64) uint32 {
|
||||
// arm64:"LSR",-"AND"
|
||||
// loong64:"SRL\t","AND","SGTU","MASKEQZ"
|
||||
// ppc64x:"ISEL",-"ORN"
|
||||
// riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -184,12 +194,14 @@ func rshMask32Ux64(v uint32, s uint64) uint32 {
|
||||
}
|
||||
|
||||
func rsh5Mask32Ux64(v uint32, s uint64) uint32 {
|
||||
// loong64:"SRL\t",-"AND"
|
||||
// riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t"
|
||||
return v >> (s & 31)
|
||||
}
|
||||
|
||||
func rshMask32x64(v int32, s uint64) int32 {
|
||||
// arm64:"ASR",-"AND"
|
||||
// loong64:"SRA\t","AND","SGTU","SUBVU","OR"
|
||||
// ppc64x:"ISEL",-"ORN"
|
||||
// riscv64:"SRAW","OR","SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -197,12 +209,14 @@ func rshMask32x64(v int32, s uint64) int32 {
|
||||
}
|
||||
|
||||
func rsh5Mask32x64(v int32, s uint64) int32 {
|
||||
// loong64:"SRA\t",-"AND"
|
||||
// riscv64:"SRAW",-"OR",-"SLTIU"
|
||||
return v >> (s & 31)
|
||||
}
|
||||
|
||||
func lshMask64x32(v int64, s uint32) int64 {
|
||||
// arm64:"LSL",-"AND"
|
||||
// loong64:"SLLV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN"
|
||||
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -211,6 +225,7 @@ func lshMask64x32(v int64, s uint32) int64 {
|
||||
|
||||
func rshMask64Ux32(v uint64, s uint32) uint64 {
|
||||
// arm64:"LSR",-"AND",-"CSEL"
|
||||
// loong64:"SRLV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN"
|
||||
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
@ -219,6 +234,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 {
|
||||
|
||||
func rshMask64x32(v int64, s uint32) int64 {
|
||||
// arm64:"ASR",-"AND",-"CSEL"
|
||||
// loong64:"SRAV",-"AND"
|
||||
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
||||
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
||||
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
||||
|
Loading…
x
Reference in New Issue
Block a user