cmd/compile: modify CSE to remove redundant OpLocalAddrs

Remove the OpLocalAddrs that are unnecessary in the CSE pass, so the
following passes like DSE and memcombine can do its work better.

Fixes #70300

Change-Id: I600025d49eeadb3ca4f092d614428399750f69bc
Reviewed-on: https://go-review.googlesource.com/c/go/+/628075
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Youlin Feng 2024-11-15 00:13:34 +08:00 committed by Gopher Robot
parent 0edea47f26
commit c4e6ab9750
3 changed files with 33058 additions and 16484 deletions

View File

@ -35,6 +35,8 @@ func cse(f *Func) {
a := f.Cache.allocValueSlice(f.NumValues()) a := f.Cache.allocValueSlice(f.NumValues())
defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a
a = a[:0] a = a[:0]
o := f.Cache.allocInt32Slice(f.NumValues()) // the ordering score for stores
defer func() { f.Cache.freeInt32Slice(o) }()
if f.auxmap == nil { if f.auxmap == nil {
f.auxmap = auxmap{} f.auxmap = auxmap{}
} }
@ -125,6 +127,9 @@ func cse(f *Func) {
// Note: commutative args already correctly ordered by byArgClass. // Note: commutative args already correctly ordered by byArgClass.
eqArgs := true eqArgs := true
for k, a := range v.Args { for k, a := range v.Args {
if v.Op == OpLocalAddr && k == 1 {
continue
}
b := w.Args[k] b := w.Args[k]
if valueEqClass[a.ID] != valueEqClass[b.ID] { if valueEqClass[a.ID] != valueEqClass[b.ID] {
eqArgs = false eqArgs = false
@ -175,7 +180,35 @@ func cse(f *Func) {
defer f.Cache.freeValueSlice(rewrite) defer f.Cache.freeValueSlice(rewrite)
for _, e := range partition { for _, e := range partition {
slices.SortFunc(e, func(v, w *Value) int { slices.SortFunc(e, func(v, w *Value) int {
return cmp.Compare(sdom.domorder(v.Block), sdom.domorder(w.Block)) c := cmp.Compare(sdom.domorder(v.Block), sdom.domorder(w.Block))
if v.Op != OpLocalAddr || c != 0 {
return c
}
// compare the memory args for OpLocalAddrs in the same block
vm := v.Args[1]
wm := w.Args[1]
if vm == wm {
return 0
}
// if the two OpLocalAddrs are in the same block, and one's memory
// arg also in the same block, but the other one's memory arg not,
// the latter must be in an ancestor block
if vm.Block != v.Block {
return -1
}
if wm.Block != w.Block {
return +1
}
// use store order if the memory args are in the same block
vs := storeOrdering(vm, o)
ws := storeOrdering(wm, o)
if vs <= 0 {
f.Fatalf("unable to determine the order of %s", vm.LongString())
}
if ws <= 0 {
f.Fatalf("unable to determine the order of %s", wm.LongString())
}
return cmp.Compare(vs, ws)
}) })
for i := 0; i < len(e)-1; i++ { for i := 0; i < len(e)-1; i++ {
@ -241,6 +274,41 @@ func cse(f *Func) {
} }
} }
// storeOrdering computes the order for stores by iterate over the store
// chain, assigns a score to each store. The scores only make sense for
// stores within the same block, and the first store by store order has
// the lowest score. The cache was used to ensure only compute once.
func storeOrdering(v *Value, cache []int32) int32 {
const minScore int32 = 1
score := minScore
w := v
for {
if s := cache[w.ID]; s >= minScore {
score += s
break
}
if w.Op == OpPhi || w.Op == OpInitMem {
break
}
a := w.MemoryArg()
if a.Block != w.Block {
break
}
w = a
score++
}
w = v
for cache[w.ID] == 0 {
cache[w.ID] = score
if score == minScore {
break
}
w = w.MemoryArg()
score--
}
return cache[v.ID]
}
// An eqclass approximates an equivalence class. During the // An eqclass approximates an equivalence class. During the
// algorithm it may represent the union of several of the // algorithm it may represent the union of several of the
// final equivalence classes. // final equivalence classes.

View File

@ -944,3 +944,29 @@ func issue66413(p *struct {
p.c = true p.c = true
p.d = 12 p.d = 12
} }
func issue70300(v uint64) (b [8]byte) {
// amd64:"MOVQ",-"MOVB"
b[0] = byte(v)
b[1] = byte(v >> 8)
b[2] = byte(v >> 16)
b[3] = byte(v >> 24)
b[4] = byte(v >> 32)
b[5] = byte(v >> 40)
b[6] = byte(v >> 48)
b[7] = byte(v >> 56)
return b
}
func issue70300Reverse(v uint64) (b [8]byte) {
// amd64:"MOVQ",-"MOVB"
b[7] = byte(v >> 56)
b[6] = byte(v >> 48)
b[5] = byte(v >> 40)
b[4] = byte(v >> 32)
b[3] = byte(v >> 24)
b[2] = byte(v >> 16)
b[1] = byte(v >> 8)
b[0] = byte(v)
return b
}

File diff suppressed because it is too large Load Diff