mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.100n ± 1% 1.101n ± 0% ~ (p=0.566 n=10) LeadingZeros8 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros16 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros32 1.2010n ± 0% 0.9511n ± 0% -20.81% (p=0.000 n=10) LeadingZeros64 1.104n ± 1% 1.119n ± 0% +1.40% (p=0.000 n=10) TrailingZeros 0.8137n ± 0% 0.8086n ± 0% -0.63% (p=0.001 n=10) TrailingZeros8 1.031n ± 1% 1.031n ± 1% ~ (p=0.956 n=10) TrailingZeros16 0.8204n ± 1% 0.8114n ± 0% -1.11% (p=0.000 n=10) TrailingZeros32 0.8145n ± 0% 0.8090n ± 0% -0.68% (p=0.000 n=10) TrailingZeros64 0.8159n ± 0% 0.8089n ± 1% -0.86% (p=0.000 n=10) OnesCount 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount8 0.8005n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) OnesCount16 0.9339n ± 0% 0.9344n ± 0% +0.05% (p=0.000 n=10) OnesCount32 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) RotateLeft 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) RotateLeft8 1.202n ± 0% 1.202n ± 0% ~ (p=0.210 n=10) RotateLeft16 0.8050n ± 0% 0.8036n ± 0% -0.17% (p=0.002 n=10) RotateLeft32 0.6674n ± 0% 0.6674n ± 0% ~ (p=1.000 n=10) RotateLeft64 0.6673n ± 0% 0.6674n ± 0% ~ (p=0.072 n=10) Reverse 0.4123n ± 0% 0.4067n ± 1% -1.37% (p=0.000 n=10) Reverse8 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Reverse16 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse32 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse64 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.001 n=10) ReverseBytes 0.4100n ± 1% 0.4057n ± 1% -1.06% (p=0.002 n=10) ReverseBytes16 0.8004n ± 0% 0.8009n ± 0% +0.07% (p=0.000 n=10) ReverseBytes32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) ReverseBytes64 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add64multiple 1.831n ± 0% 1.832n ± 0% ~ (p=1.000 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.601n ± 0% 1.602n ± 0% +0.06% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Sub64multiple 2.400n ± 0% 2.402n ± 0% +0.10% (p=0.000 n=10) Mul 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul64 0.8004n ± 0% 0.8008n ± 0% +0.05% (p=0.000 n=10) Div 9.107n ± 0% 9.083n ± 0% ~ (p=0.255 n=10) Div32 4.009n ± 0% 4.011n ± 0% +0.05% (p=0.000 n=10) Div64 9.705n ± 0% 9.711n ± 0% +0.06% (p=0.000 n=10) geomean 1.089n 1.083n -0.62% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.352n ± 0% 1.341n ± 4% -0.81% (p=0.024 n=10) LeadingZeros8 1.766n ± 0% 1.781n ± 0% +0.88% (p=0.000 n=10) LeadingZeros16 1.766n ± 0% 1.782n ± 0% +0.88% (p=0.000 n=10) LeadingZeros32 1.536n ± 0% 1.341n ± 1% -12.73% (p=0.000 n=10) LeadingZeros64 1.351n ± 1% 1.338n ± 0% -0.96% (p=0.000 n=10) TrailingZeros 0.9037n ± 0% 0.9025n ± 0% -0.12% (p=0.020 n=10) TrailingZeros8 1.087n ± 3% 1.056n ± 0% ~ (p=0.060 n=10) TrailingZeros16 1.101n ± 0% 1.101n ± 0% ~ (p=0.211 n=10) TrailingZeros32 0.9040n ± 0% 0.9024n ± 1% -0.18% (p=0.017 n=10) TrailingZeros64 0.9043n ± 0% 0.9028n ± 1% ~ (p=0.118 n=10) OnesCount 1.503n ± 2% 1.482n ± 1% -1.43% (p=0.001 n=10) OnesCount8 1.207n ± 0% 1.206n ± 0% -0.12% (p=0.000 n=10) OnesCount16 1.501n ± 0% 1.534n ± 0% +2.13% (p=0.000 n=10) OnesCount32 1.483n ± 1% 1.531n ± 1% +3.27% (p=0.000 n=10) OnesCount64 1.301n ± 0% 1.302n ± 0% +0.08% (p=0.000 n=10) RotateLeft 0.8136n ± 4% 0.8083n ± 0% -0.66% (p=0.002 n=10) RotateLeft8 1.311n ± 0% 1.310n ± 0% ~ (p=0.786 n=10) RotateLeft16 1.165n ± 0% 1.149n ± 0% -1.33% (p=0.001 n=10) RotateLeft32 0.8138n ± 1% 0.8093n ± 0% -0.57% (p=0.017 n=10) RotateLeft64 0.8149n ± 1% 0.8088n ± 0% -0.74% (p=0.000 n=10) Reverse 0.5195n ± 1% 0.5109n ± 0% -1.67% (p=0.000 n=10) Reverse8 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.012 n=10) Reverse64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.010 n=10) ReverseBytes 0.5120n ± 1% 0.5122n ± 2% ~ (p=0.306 n=10) ReverseBytes16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 4% ~ (p=0.334 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.563 n=10) Add64 1.201n ± 0% 1.201n ± 1% ~ (p=0.652 n=10) Add64multiple 1.909n ± 0% 1.902n ± 0% ~ (p=0.126 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.655n ± 0% 1.654n ± 0% ~ (p=0.589 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub64multiple 2.150n ± 0% 2.180n ± 4% +1.37% (p=0.000 n=10) Mul 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.011 n=10) Mul32 1.053n ± 0% 1.030n ± 0% -2.23% (p=0.000 n=10) Mul64 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.018 n=10) Div 11.59n ± 0% 11.57n ± 1% ~ (p=0.091 n=10) Div32 4.337n ± 0% 4.337n ± 1% ~ (p=0.783 n=10) Div64 12.81n ± 0% 12.76n ± 0% -0.39% (p=0.001 n=10) geomean 1.257n 1.252n -0.46% Change-Id: I9e93ea49736760c19dc6b6463d2aa95878121b7b Reviewed-on: https://go-review.googlesource.com/c/go/+/627855 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: Junyang Shao <shaojunyang@google.com>
579 lines
14 KiB
Go
579 lines
14 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// ------------------ //
|
|
// constant shifts //
|
|
// ------------------ //
|
|
|
|
func lshConst64x64(v int64) int64 {
|
|
// loong64:"SLLV"
|
|
// ppc64x:"SLD"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
|
return v << uint64(33)
|
|
}
|
|
|
|
func rshConst64Ux64(v uint64) uint64 {
|
|
// loong64:"SRLV"
|
|
// ppc64x:"SRD"
|
|
// riscv64:"SRLI\t",-"AND",-"SLTIU"
|
|
return v >> uint64(33)
|
|
}
|
|
|
|
func rshConst64Ux64Overflow32(v uint32) uint64 {
|
|
// loong64:"MOVV\t\\$0,",-"SRL\t"
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 32
|
|
}
|
|
|
|
func rshConst64Ux64Overflow16(v uint16) uint64 {
|
|
// loong64:"MOVV\t\\$0,",-"SRLV"
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 16
|
|
}
|
|
|
|
func rshConst64Ux64Overflow8(v uint8) uint64 {
|
|
// loong64:"MOVV\t\\$0,",-"SRLV"
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 8
|
|
}
|
|
|
|
func rshConst64x64(v int64) int64 {
|
|
// loong64:"SRAV"
|
|
// ppc64x:"SRAD"
|
|
// riscv64:"SRAI\t",-"OR",-"SLTIU"
|
|
return v >> uint64(33)
|
|
}
|
|
|
|
func rshConst64x64Overflow32(v int32) int64 {
|
|
// loong64:"SRA\t\\$31"
|
|
// riscv64:"SRAIW",-"SLLI",-"SRAI\t"
|
|
return int64(v) >> 32
|
|
}
|
|
|
|
func rshConst64x64Overflow16(v int16) int64 {
|
|
// loong64:"SLLV\t\\$48","SRAV\t\\$63"
|
|
// riscv64:"SLLI","SRAI",-"SRAIW"
|
|
return int64(v) >> 16
|
|
}
|
|
|
|
func rshConst64x64Overflow8(v int8) int64 {
|
|
// loong64:"SLLV\t\\$56","SRAV\t\\$63"
|
|
// riscv64:"SLLI","SRAI",-"SRAIW"
|
|
return int64(v) >> 8
|
|
}
|
|
|
|
func lshConst32x1(v int32) int32 {
|
|
// amd64:"ADDL", -"SHLL"
|
|
return v << 1
|
|
}
|
|
|
|
func lshConst64x1(v int64) int64 {
|
|
// amd64:"ADDQ", -"SHLQ"
|
|
return v << 1
|
|
}
|
|
|
|
func lshConst32x64(v int32) int32 {
|
|
// loong64:"SLL\t"
|
|
// ppc64x:"SLW"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
|
|
return v << uint64(29)
|
|
}
|
|
|
|
func rshConst32Ux64(v uint32) uint32 {
|
|
// loong64:"SRL\t"
|
|
// ppc64x:"SRW"
|
|
// riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW"
|
|
return v >> uint64(29)
|
|
}
|
|
|
|
func rshConst32x64(v int32) int32 {
|
|
// loong64:"SRA\t"
|
|
// ppc64x:"SRAW"
|
|
// riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW"
|
|
return v >> uint64(29)
|
|
}
|
|
|
|
func lshConst64x32(v int64) int64 {
|
|
// loong64:"SLLV"
|
|
// ppc64x:"SLD"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
|
return v << uint32(33)
|
|
}
|
|
|
|
func rshConst64Ux32(v uint64) uint64 {
|
|
// loong64:"SRLV"
|
|
// ppc64x:"SRD"
|
|
// riscv64:"SRLI\t",-"AND",-"SLTIU"
|
|
return v >> uint32(33)
|
|
}
|
|
|
|
func rshConst64x32(v int64) int64 {
|
|
// loong64:"SRAV"
|
|
// ppc64x:"SRAD"
|
|
// riscv64:"SRAI\t",-"OR",-"SLTIU"
|
|
return v >> uint32(33)
|
|
}
|
|
|
|
func lshConst32x1Add(x int32) int32 {
|
|
// amd64:"SHLL\t[$]2"
|
|
return (x + x) << 1
|
|
}
|
|
|
|
func lshConst64x1Add(x int64) int64 {
|
|
// amd64:"SHLQ\t[$]2"
|
|
return (x + x) << 1
|
|
}
|
|
|
|
func lshConst32x2Add(x int32) int32 {
|
|
// amd64:"SHLL\t[$]3"
|
|
return (x + x) << 2
|
|
}
|
|
|
|
func lshConst64x2Add(x int64) int64 {
|
|
// amd64:"SHLQ\t[$]3"
|
|
return (x + x) << 2
|
|
}
|
|
|
|
// ------------------ //
|
|
// masked shifts //
|
|
// ------------------ //
|
|
|
|
func lshMask64x64(v int64, s uint64) int64 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux64(v uint64, s uint64) uint64 {
|
|
// arm64:"LSR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x64(v int64, s uint64) int64 {
|
|
// arm64:"ASR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask32x64(v int32, s uint64) int32 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask32Ux64(v uint32, s uint64) uint32 {
|
|
// arm64:"LSR",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rsh5Mask32Ux64(v uint32, s uint64) uint32 {
|
|
// riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t"
|
|
return v >> (s & 31)
|
|
}
|
|
|
|
func rshMask32x64(v int32, s uint64) int32 {
|
|
// arm64:"ASR",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SRAW","OR","SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rsh5Mask32x64(v int32, s uint64) int32 {
|
|
// riscv64:"SRAW",-"OR",-"SLTIU"
|
|
return v >> (s & 31)
|
|
}
|
|
|
|
func lshMask64x32(v int64, s uint32) int64 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"RLDICL",-"ORN"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux32(v uint64, s uint32) uint64 {
|
|
// arm64:"LSR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x32(v int64, s uint32) int64 {
|
|
// arm64:"ASR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask64x32Ext(v int64, s int32) int64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << uint(s&63)
|
|
}
|
|
|
|
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
func rshMask64x32Ext(v int64, s int32) int64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
// --------------- //
|
|
// signed shifts //
|
|
// --------------- //
|
|
|
|
// We do want to generate a test + panicshift for these cases.
|
|
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:"TESTB"
|
|
_ = x << v8
|
|
// amd64:"TESTW"
|
|
_ = x << v16
|
|
// amd64:"TESTL"
|
|
_ = x << v32
|
|
// amd64:"TESTQ"
|
|
_ = x << v64
|
|
}
|
|
|
|
// We want to avoid generating a test + panicshift for these cases.
|
|
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:-"TESTB"
|
|
_ = x << (v8 & 7)
|
|
// amd64:-"TESTW"
|
|
_ = x << (v16 & 15)
|
|
// amd64:-"TESTL"
|
|
_ = x << (v32 & 31)
|
|
// amd64:-"TESTQ"
|
|
_ = x << (v64 & 63)
|
|
}
|
|
|
|
// ------------------ //
|
|
// bounded shifts //
|
|
// ------------------ //
|
|
|
|
func lshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// riscv64:"SLL",-"AND",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"LSL",-"CSEL"
|
|
return v << s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func rshGuarded64U(v uint64, s uint) uint64 {
|
|
if s < 64 {
|
|
// riscv64:"SRL\t",-"AND",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"LSR",-"CSEL"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func rshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"ASR",-"CSEL"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r1 = val64 << shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r2 = val32 << shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r3 = val16 << shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r4 = val8 << shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r1 = val64 << shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r2 = val32 << shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r3 = val16 << shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r4 = val8 << shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r1 = val64 >> shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r2 = val32 >> shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r3 = val16 >> shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r4 = val8 >> shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r1 = val64 >> shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r2 = val32 >> shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r3 = val16 >> shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r4 = val8 >> shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
|
|
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f := tab[byte(v)^b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)&b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)|b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)&h]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)^h]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)|h]
|
|
// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
f += tab[v&0xff]
|
|
// ppc64x:-".*AND",".*CLRLSLWI"
|
|
f += 2 * uint32(uint16(d))
|
|
// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
g := 2 * uint64(uint32(d))
|
|
return f, g
|
|
}
|
|
|
|
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
|
|
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
f := (v8 & 0xF) << 2
|
|
// ppc64x:"CLRLSLWI"
|
|
f += byte(v16) << 3
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
g := (v16 & 0xFF) << 3
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
h := (v32 & 0xFFFFF) << 2
|
|
// ppc64x:"CLRLSLDI"
|
|
i := (v64 & 0xFFFFFFFF) << 5
|
|
// ppc64x:-"CLRLSLDI"
|
|
i += (v64 & 0xFFFFFFF) << 38
|
|
// ppc64x/power9:-"CLRLSLDI"
|
|
i += (v64 & 0xFFFF00) << 10
|
|
// ppc64x/power9:-"SLD","EXTSWSLI"
|
|
j := int64(x32+32) * 8
|
|
return f, g, h, i, j
|
|
}
|
|
|
|
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
|
|
|
|
// ppc64x:-".*MOVW"
|
|
f := int32(v >> 32)
|
|
// ppc64x:".*MOVW"
|
|
f += int32(v >> 31)
|
|
// ppc64x:-".*MOVH"
|
|
g := int16(v >> 48)
|
|
// ppc64x:".*MOVH"
|
|
g += int16(v >> 30)
|
|
// ppc64x:-".*MOVH"
|
|
g += int16(f >> 16)
|
|
// ppc64x:-".*MOVB"
|
|
h := int8(v >> 56)
|
|
// ppc64x:".*MOVB"
|
|
h += int8(v >> 28)
|
|
// ppc64x:-".*MOVB"
|
|
h += int8(f >> 24)
|
|
// ppc64x:".*MOVB"
|
|
h += int8(f >> 16)
|
|
return int64(h), uint64(g)
|
|
}
|
|
|
|
func checkShiftAndMask32(v []uint32) {
|
|
i := 0
|
|
|
|
// ppc64x: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
v[i] = (v[i] & 0xFF00000) >> 8
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
v[i] = (v[i] & 0xFF00) >> 6
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] & 0xFF) >> 8
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] & 0xF000000) >> 28
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
|
v[i] = (v[i] >> 6) & 0xFF
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
v[i] = (v[i] >> 6) & 0xFF000
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] >> 20) & 0xFF000
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] >> 24) & 0xFF00
|
|
i++
|
|
}
|
|
|
|
func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
a[0] = a[uint8(v>>24)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[0] = b[uint8(v>>24)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[1] = b[(v>>20)&0xFF]
|
|
// ppc64x: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
|
|
b[2] = b[v>>25]
|
|
}
|
|
|
|
func checkMergedShifts64(a [256]uint32, b [256]uint64, c [256]byte, v uint64) {
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
a[0] = a[uint8(v>>24)]
|
|
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
|
|
a[1] = a[uint8(v>>25)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]9, R[0-9]+, [$]23, [$]29, R[0-9]+"
|
|
a[2] = a[v>>25&0x7F]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]3, R[0-9]+, [$]29, [$]29, R[0-9]+"
|
|
a[3] = a[(v>>31)&0x01]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]12, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[0] = b[uint8(v>>23)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[1] = b[(v>>20)&0xFF]
|
|
// ppc64x: "RLWNM", -"SLD"
|
|
b[2] = b[((uint64((uint32(v) >> 21)) & 0x3f) << 4)]
|
|
// ppc64x: "RLWNM\t[$]11, R[0-9]+, [$]10, [$]15"
|
|
c[0] = c[((v>>5)&0x3F)<<16]
|
|
// ppc64x: "ANDCC\t[$]8064,"
|
|
c[1] = c[((v>>7)&0x3F)<<7]
|
|
}
|
|
|
|
func checkShiftMask(a uint32, b uint64, z []uint32, y []uint64) {
|
|
_ = y[128]
|
|
_ = z[128]
|
|
// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
|
|
z[0] = uint32(uint8(a >> 5))
|
|
// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
|
|
z[1] = uint32(uint8((a >> 4) & 0x7e))
|
|
// ppc64x: "RLWNM\t[$]25, R[0-9]+, [$]27, [$]29, R[0-9]+"
|
|
z[2] = uint32(uint8(a>>7)) & 0x1c
|
|
// ppc64x: -"MOVWZ"
|
|
y[0] = uint64((a >> 6) & 0x1c)
|
|
// ppc64x: -"MOVWZ"
|
|
y[1] = uint64(uint32(b)<<6) + 1
|
|
// ppc64x: -"MOVHZ", -"MOVWZ"
|
|
y[2] = uint64((uint16(a) >> 9) & 0x1F)
|
|
// ppc64x: -"MOVHZ", -"MOVWZ", -"ANDCC"
|
|
y[3] = uint64(((uint16(a) & 0xFF0) >> 9) & 0x1F)
|
|
}
|
|
|
|
// 128 bit shifts
|
|
|
|
func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
|
|
s := bits & 63
|
|
ŝ := (64 - bits) & 63
|
|
// check that the shift operation has two commas (three operands)
|
|
// amd64:"SHRQ.*,.*,"
|
|
shr := x>>s | y<<ŝ
|
|
// amd64:"SHLQ.*,.*,"
|
|
shl := x<<s | y>>ŝ
|
|
return shr, shl
|
|
}
|
|
|
|
func checkShiftToMask(u []uint64, s []int64) {
|
|
// amd64:-"SHR",-"SHL","ANDQ"
|
|
u[0] = u[0] >> 5 << 5
|
|
// amd64:-"SAR",-"SHL","ANDQ"
|
|
s[0] = s[0] >> 5 << 5
|
|
// amd64:-"SHR",-"SHL","ANDQ"
|
|
u[1] = u[1] << 5 >> 5
|
|
}
|
|
|
|
//
|
|
// Left shift with addition.
|
|
//
|
|
|
|
func checkLeftShiftWithAddition(a int64, b int64) int64 {
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH1ADD"
|
|
a = a + b<<1
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH2ADD"
|
|
a = a + b<<2
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH3ADD"
|
|
a = a + b<<3
|
|
return a
|
|
}
|