cmd/compile: fix memcombine pass for big endian, > 1 byte elements

The shift amounts were wrong in this case, leading to miscompilation
of load combining.

Also the store combining was not triggering when it should.

Fixes #64468

Change-Id: Iaeb08972c5fc1d6f628800334789c6af7216e87b
Reviewed-on: https://go-review.googlesource.com/c/go/+/546355
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Keith Randall 2023-11-30 10:04:16 -08:00 committed by Keith Randall
parent 76d90a34dd
commit bda1ef13f8
3 changed files with 169 additions and 6 deletions

View File

@ -313,8 +313,8 @@ func combineLoads(root *Value, n int64) bool {
if isLittleEndian && shift0 != 0 {
v = leftShift(loadBlock, pos, v, shift0)
}
if isBigEndian && shift0-(n-1)*8 != 0 {
v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
if isBigEndian && shift0-(n-1)*size*8 != 0 {
v = leftShift(loadBlock, pos, v, shift0-(n-1)*size*8)
}
// Install with (Copy v).
@ -662,14 +662,14 @@ func combineStores(root *Value, n int64) bool {
isLittleEndian := true
shift0 := shift(a[0].store, shiftBase)
for i := int64(1); i < n; i++ {
if shift(a[i].store, shiftBase) != shift0+i*8 {
if shift(a[i].store, shiftBase) != shift0+i*size*8 {
isLittleEndian = false
break
}
}
isBigEndian := true
for i := int64(1); i < n; i++ {
if shift(a[i].store, shiftBase) != shift0-i*8 {
if shift(a[i].store, shiftBase) != shift0-i*size*8 {
isBigEndian = false
break
}
@ -692,8 +692,8 @@ func combineStores(root *Value, n int64) bool {
if isLittleEndian && shift0 != 0 {
sv = rightShift(root.Block, root.Pos, sv, shift0)
}
if isBigEndian && shift0-(n-1)*8 != 0 {
sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*8)
if isBigEndian && shift0-(n-1)*size*8 != 0 {
sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*size*8)
}
if sv.Type.Size() > size*n {
sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)

View File

@ -71,3 +71,129 @@ func readUint32be(b []byte) uint64 {
//go:noinline
func nop() {
}
type T32 struct {
a, b uint32
}
//go:noinline
func (t *T32) bigEndianLoad() uint64 {
return uint64(t.a)<<32 | uint64(t.b)
}
//go:noinline
func (t *T32) littleEndianLoad() uint64 {
return uint64(t.a) | (uint64(t.b) << 32)
}
//go:noinline
func (t *T32) bigEndianStore(x uint64) {
t.a = uint32(x >> 32)
t.b = uint32(x)
}
//go:noinline
func (t *T32) littleEndianStore(x uint64) {
t.a = uint32(x)
t.b = uint32(x >> 32)
}
type T16 struct {
a, b uint16
}
//go:noinline
func (t *T16) bigEndianLoad() uint32 {
return uint32(t.a)<<16 | uint32(t.b)
}
//go:noinline
func (t *T16) littleEndianLoad() uint32 {
return uint32(t.a) | (uint32(t.b) << 16)
}
//go:noinline
func (t *T16) bigEndianStore(x uint32) {
t.a = uint16(x >> 16)
t.b = uint16(x)
}
//go:noinline
func (t *T16) littleEndianStore(x uint32) {
t.a = uint16(x)
t.b = uint16(x >> 16)
}
type T8 struct {
a, b uint8
}
//go:noinline
func (t *T8) bigEndianLoad() uint16 {
return uint16(t.a)<<8 | uint16(t.b)
}
//go:noinline
func (t *T8) littleEndianLoad() uint16 {
return uint16(t.a) | (uint16(t.b) << 8)
}
//go:noinline
func (t *T8) bigEndianStore(x uint16) {
t.a = uint8(x >> 8)
t.b = uint8(x)
}
//go:noinline
func (t *T8) littleEndianStore(x uint16) {
t.a = uint8(x)
t.b = uint8(x >> 8)
}
func TestIssue64468(t *testing.T) {
t32 := T32{1, 2}
if got, want := t32.bigEndianLoad(), uint64(1<<32+2); got != want {
t.Errorf("T32.bigEndianLoad got %x want %x\n", got, want)
}
if got, want := t32.littleEndianLoad(), uint64(1+2<<32); got != want {
t.Errorf("T32.littleEndianLoad got %x want %x\n", got, want)
}
t16 := T16{1, 2}
if got, want := t16.bigEndianLoad(), uint32(1<<16+2); got != want {
t.Errorf("T16.bigEndianLoad got %x want %x\n", got, want)
}
if got, want := t16.littleEndianLoad(), uint32(1+2<<16); got != want {
t.Errorf("T16.littleEndianLoad got %x want %x\n", got, want)
}
t8 := T8{1, 2}
if got, want := t8.bigEndianLoad(), uint16(1<<8+2); got != want {
t.Errorf("T8.bigEndianLoad got %x want %x\n", got, want)
}
if got, want := t8.littleEndianLoad(), uint16(1+2<<8); got != want {
t.Errorf("T8.littleEndianLoad got %x want %x\n", got, want)
}
t32.bigEndianStore(1<<32 + 2)
if got, want := t32, (T32{1, 2}); got != want {
t.Errorf("T32.bigEndianStore got %x want %x\n", got, want)
}
t32.littleEndianStore(1<<32 + 2)
if got, want := t32, (T32{2, 1}); got != want {
t.Errorf("T32.littleEndianStore got %x want %x\n", got, want)
}
t16.bigEndianStore(1<<16 + 2)
if got, want := t16, (T16{1, 2}); got != want {
t.Errorf("T16.bigEndianStore got %x want %x\n", got, want)
}
t16.littleEndianStore(1<<16 + 2)
if got, want := t16, (T16{2, 1}); got != want {
t.Errorf("T16.littleEndianStore got %x want %x\n", got, want)
}
t8.bigEndianStore(1<<8 + 2)
if got, want := t8, (T8{1, 2}); got != want {
t.Errorf("T8.bigEndianStore got %x want %x\n", got, want)
}
t8.littleEndianStore(1<<8 + 2)
if got, want := t8, (T8{2, 1}); got != want {
t.Errorf("T8.littleEndianStore got %x want %x\n", got, want)
}
}

View File

@ -882,3 +882,40 @@ func wideStore2(p *[8]uint64, x, y uint64) {
// s390x:-"STMG",-"MOVD"
p[1] = y
}
func store32le(p *struct{ a, b uint32 }, x uint64) {
// amd64:"MOVQ",-"MOVL",-"SHRQ"
// arm64:"MOVD",-"MOVW",-"LSR"
// ppc64le:"MOVD",-"MOVW",-"SRD"
p.a = uint32(x)
// amd64:-"MOVL",-"SHRQ"
// arm64:-"MOVW",-"LSR"
// ppc64le:-"MOVW",-"SRD"
p.b = uint32(x >> 32)
}
func store32be(p *struct{ a, b uint32 }, x uint64) {
// ppc64:"MOVD",-"MOVW",-"SRD"
// s390x:"MOVD",-"MOVW",-"SRD"
p.a = uint32(x >> 32)
// ppc64:-"MOVW",-"SRD"
// s390x:-"MOVW",-"SRD"
p.b = uint32(x)
}
func store16le(p *struct{ a, b uint16 }, x uint32) {
// amd64:"MOVL",-"MOVW",-"SHRL"
// arm64:"MOVW",-"MOVH",-"UBFX"
// ppc64le:"MOVW",-"MOVH",-"SRW"
p.a = uint16(x)
// amd64:-"MOVW",-"SHRL"
// arm64:-"MOVH",-"UBFX"
// ppc64le:-"MOVH",-"SRW"
p.b = uint16(x >> 16)
}
func store16be(p *struct{ a, b uint16 }, x uint32) {
// ppc64:"MOVW",-"MOVH",-"SRW"
// s390x:"MOVW",-"MOVH",-"SRW"
p.a = uint16(x >> 16)
// ppc64:-"MOVH",-"SRW"
// s390x:-"MOVH",-"SRW"
p.b = uint16(x)
}