go/test/codegen/rotate.go
Paul E. Murphy c3c6fbf314 cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.

Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.

Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).

Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.

The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):

pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt                               52.2µs ± 0%    47.5µs ± 0%  -8.88%
ExpandKey                                       44.4µs ± 0%    40.3µs ± 0%  -9.15%

pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key                                             57.6ms ± 0%    52.3ms ± 0%  -9.13%

pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal                                           90.9ms ± 0%    82.6ms ± 0%  -9.13%
DefaultCost                                     91.0ms ± 0%    82.7ms ± 0%  -9.12%

Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
2020-10-27 18:33:20 +00:00

214 lines
4.0 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "math/bits"
// ------------------- //
// const rotates //
// ------------------- //
func rot64(x uint64) uint64 {
var a uint64
// amd64:"ROLQ\t[$]7"
// arm64:"ROR\t[$]57"
// s390x:"RLLG\t[$]7"
// ppc64:"ROTL\t[$]7"
// ppc64le:"ROTL\t[$]7"
a += x<<7 | x>>57
// amd64:"ROLQ\t[$]8"
// arm64:"ROR\t[$]56"
// s390x:"RLLG\t[$]8"
// ppc64:"ROTL\t[$]8"
// ppc64le:"ROTL\t[$]8"
a += x<<8 + x>>56
// amd64:"ROLQ\t[$]9"
// arm64:"ROR\t[$]55"
// s390x:"RLLG\t[$]9"
// ppc64:"ROTL\t[$]9"
// ppc64le:"ROTL\t[$]9"
a += x<<9 ^ x>>55
return a
}
func rot32(x uint32) uint32 {
var a uint32
// amd64:"ROLL\t[$]7"
// arm:"MOVW\tR\\d+@>25"
// arm64:"RORW\t[$]25"
// s390x:"RLL\t[$]7"
// ppc64:"ROTLW\t[$]7"
// ppc64le:"ROTLW\t[$]7"
a += x<<7 | x>>25
// amd64:`ROLL\t[$]8`
// arm:"MOVW\tR\\d+@>24"
// arm64:"RORW\t[$]24"
// s390x:"RLL\t[$]8"
// ppc64:"ROTLW\t[$]8"
// ppc64le:"ROTLW\t[$]8"
a += x<<8 + x>>24
// amd64:"ROLL\t[$]9"
// arm:"MOVW\tR\\d+@>23"
// arm64:"RORW\t[$]23"
// s390x:"RLL\t[$]9"
// ppc64:"ROTLW\t[$]9"
// ppc64le:"ROTLW\t[$]9"
a += x<<9 ^ x>>23
return a
}
func rot16(x uint16) uint16 {
var a uint16
// amd64:"ROLW\t[$]7"
a += x<<7 | x>>9
// amd64:`ROLW\t[$]8`
a += x<<8 + x>>8
// amd64:"ROLW\t[$]9"
a += x<<9 ^ x>>7
return a
}
func rot8(x uint8) uint8 {
var a uint8
// amd64:"ROLB\t[$]5"
a += x<<5 | x>>3
// amd64:`ROLB\t[$]6`
a += x<<6 + x>>2
// amd64:"ROLB\t[$]7"
a += x<<7 ^ x>>1
return a
}
// ----------------------- //
// non-const rotates //
// ----------------------- //
func rot64nc(x uint64, z uint) uint64 {
var a uint64
z &= 63
// amd64:"ROLQ"
// ppc64:"ROTL"
// ppc64le:"ROTL"
a += x<<z | x>>(64-z)
// amd64:"RORQ"
a += x>>z | x<<(64-z)
return a
}
func rot32nc(x uint32, z uint) uint32 {
var a uint32
z &= 31
// amd64:"ROLL"
// ppc64:"ROTLW"
// ppc64le:"ROTLW"
a += x<<z | x>>(32-z)
// amd64:"RORL"
a += x>>z | x<<(32-z)
return a
}
func rot16nc(x uint16, z uint) uint16 {
var a uint16
z &= 15
// amd64:"ROLW"
a += x<<z | x>>(16-z)
// amd64:"RORW"
a += x>>z | x<<(16-z)
return a
}
func rot8nc(x uint8, z uint) uint8 {
var a uint8
z &= 7
// amd64:"ROLB"
a += x<<z | x>>(8-z)
// amd64:"RORB"
a += x>>z | x<<(8-z)
return a
}
// Issue 18254: rotate after inlining
func f32(x uint32) uint32 {
// amd64:"ROLL\t[$]7"
return rot32nc(x, 7)
}
// --------------------------------------- //
// Combined Rotate + Masking operations //
// --------------------------------------- //
func checkMaskedRotate32(a []uint32, r int) {
i := 0
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], r) & 0xFF00
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF
i++
}