mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
cmd/compile: match more patterns for shortcircuit
This CL tries to generalize the pattern matching of certain shortcircuit-able CFGs a bit more: For a shortcircuit-able CFG: p q \ / b / \ t u Where the constant branch is t, and b has multiple phi values other than the control phi. For the non-constant branch target u, we try to match the "diamond" shape CFG: p q \ / b / \ t u \ / m or p q \ / b |\ | u |/ m Instead of matching u as a single block, we know try to generalize it as a subgraph that satisfy condition: it's a DAG that has a single entry point u, and has a path to m. compilebench stats: │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Template 109.4m ± 3% 109.8m ± 3% ~ (p=0.796 n=10) Unicode 94.23m ± 1% 93.85m ± 1% ~ (p=0.631 n=10) GoTypes 538.2m ± 1% 538.8m ± 1% ~ (p=0.912 n=10) Compiler 90.21m ± 1% 90.02m ± 1% ~ (p=0.436 n=10) SSA 3.318 ± 1% 3.323 ± 1% ~ (p=1.000 n=10) Flate 69.38m ± 1% 69.57m ± 2% ~ (p=0.529 n=10) GoParser 128.5m ± 1% 127.4m ± 1% ~ (p=0.075 n=10) Reflect 267.4m ± 1% 267.2m ± 2% ~ (p=0.739 n=10) Tar 127.7m ± 2% 126.4m ± 1% ~ (p=0.353 n=10) XML 149.5m ± 1% 149.6m ± 2% ~ (p=0.684 n=10) LinkCompiler 390.0m ± 1% 388.4m ± 2% ~ (p=0.353 n=10) ExternalLinkCompiler 1.296 ± 0% 1.296 ± 1% ~ (p=0.971 n=10) LinkWithoutDebugCompiler 226.3m ± 1% 225.5m ± 1% ~ (p=0.393 n=10) StdCmd 13.26 ± 0% 13.25 ± 1% ~ (p=0.529 n=10) geomean 319.3m 318.8m -0.17% │ old.txt │ new.txt │ │ user-sec/op │ user-sec/op vs base │ Template 293.1m ± 3% 291.4m ± 11% ~ (p=0.436 n=10) Unicode 91.09m ± 5% 87.61m ± 7% ~ (p=0.165 n=10) GoTypes 1.932 ± 3% 1.926 ± 3% ~ (p=0.739 n=10) Compiler 125.8m ± 3% 121.5m ± 10% ~ (p=0.481 n=10) SSA 18.93 ± 3% 18.89 ± 1% ~ (p=0.684 n=10) Flate 158.5m ± 5% 160.0m ± 7% ~ (p=0.971 n=10) GoParser 316.0m ± 9% 327.4m ± 7% ~ (p=0.052 n=10) Reflect 845.6m ± 6% 861.6m ± 3% ~ (p=0.579 n=10) Tar 358.1m ± 5% 348.5m ± 4% ~ (p=0.089 n=10) XML 382.4m ± 4% 392.2m ± 3% ~ (p=0.143 n=10) LinkCompiler 609.1m ± 4% 627.9m ± 3% ~ (p=0.123 n=10) ExternalLinkCompiler 1.336 ± 2% 1.343 ± 4% ~ (p=0.565 n=10) LinkWithoutDebugCompiler 248.7m ± 3% 248.0m ± 1% ~ (p=0.853 n=10) geomean 506.4m 506.8m +0.08% │ old.txt │ new.txt │ │ text-bytes │ text-bytes vs base │ HelloSize 965.8Ki ± 0% 965.0Ki ± 0% -0.08% (p=0.000 n=10) CmdGoSize 12.30Mi ± 0% 12.29Mi ± 0% -0.08% (p=0.000 n=10) geomean 3.406Mi 3.403Mi -0.08% │ old.txt │ new.txt │ │ data-bytes │ data-bytes vs base │ HelloSize 15.08Ki ± 0% 15.08Ki ± 0% ~ (p=1.000 n=10) ¹ CmdGoSize 408.5Ki ± 0% 408.5Ki ± 0% ~ (p=1.000 n=10) ¹ geomean 78.49Ki 78.49Ki +0.00% ¹ all samples are equal │ old.txt │ new.txt │ │ bss-bytes │ bss-bytes vs base │ HelloSize 142.0Ki ± 0% 142.0Ki ± 0% ~ (p=1.000 n=10) ¹ CmdGoSize 206.4Ki ± 0% 206.4Ki ± 0% ~ (p=1.000 n=10) ¹ geomean 171.2Ki 171.2Ki +0.00% ¹ all samples are equal │ old.txt │ new.txt │ │ exe-bytes │ exe-bytes vs base │ HelloSize 1.466Mi ± 0% 1.462Mi ± 0% -0.27% (p=0.000 n=10) CmdGoSize 18.19Mi ± 0% 18.17Mi ± 0% -0.10% (p=0.000 n=10) geomean 5.164Mi 5.154Mi -0.18% Fixes #72132 Change-Id: I3d1cb10b6a158c5750adc23c79709d63dbd771f3 Reviewed-on: https://go-review.googlesource.com/c/go/+/656255 Auto-Submit: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
b3aff930cf
commit
4d6722a8fd
@ -135,6 +135,8 @@ func shortcircuitBlock(b *Block) bool {
|
||||
// to reason about the values of phis.
|
||||
return false
|
||||
}
|
||||
// We only process blocks with only phi values except for control
|
||||
// value and its wrappers.
|
||||
if len(b.Values) != nval+nOtherPhi {
|
||||
return false
|
||||
}
|
||||
@ -293,78 +295,85 @@ func shortcircuitPhiPlan(b *Block, ctl *Value, cidx int, ti int64) func(*Value,
|
||||
// In these cases, we can reconstruct what the value
|
||||
// of any phi in b must be in the successor blocks.
|
||||
|
||||
if len(t.Preds) == 1 && len(t.Succs) == 1 &&
|
||||
len(u.Preds) == 1 && len(u.Succs) == 1 &&
|
||||
t.Succs[0].b == u.Succs[0].b && len(t.Succs[0].b.Preds) == 2 {
|
||||
// p q
|
||||
// \ /
|
||||
// b
|
||||
// / \
|
||||
// t u
|
||||
// \ /
|
||||
// m
|
||||
//
|
||||
// After the CFG modifications, this will look like
|
||||
//
|
||||
// p q
|
||||
// | /
|
||||
// | b
|
||||
// |/ \
|
||||
// t u
|
||||
// \ /
|
||||
// m
|
||||
//
|
||||
// NB: t.Preds is (b, p), not (p, b).
|
||||
if len(t.Preds) == 1 && len(t.Succs) == 1 && len(u.Preds) == 1 &&
|
||||
len(t.Succs[0].b.Preds) == 2 {
|
||||
m := t.Succs[0].b
|
||||
return func(v *Value, i int) {
|
||||
// Replace any uses of v in t and u with the value v must have,
|
||||
// given that we have arrived at that block.
|
||||
// Then move v to m and adjust its value accordingly;
|
||||
// this handles all other uses of v.
|
||||
argP, argQ := v.Args[cidx], v.Args[1^cidx]
|
||||
u.replaceUses(v, argQ)
|
||||
phi := t.Func.newValue(OpPhi, v.Type, t, v.Pos)
|
||||
phi.AddArg2(argQ, argP)
|
||||
t.replaceUses(v, phi)
|
||||
if v.Uses == 0 {
|
||||
return
|
||||
}
|
||||
v.moveTo(m, i)
|
||||
// The phi in m belongs to whichever pred idx corresponds to t.
|
||||
if m.Preds[0].b == t {
|
||||
v.SetArgs2(phi, argQ)
|
||||
} else {
|
||||
v.SetArgs2(argQ, phi)
|
||||
if visited := u.flowsTo(m, 5); visited != nil {
|
||||
// p q
|
||||
// \ /
|
||||
// b
|
||||
// / \
|
||||
// t U (sub graph that satisfy condition in flowsTo)
|
||||
// \ /
|
||||
// m
|
||||
//
|
||||
// After the CFG modifications, this will look like
|
||||
//
|
||||
// p q
|
||||
// | /
|
||||
// | b
|
||||
// |/ \
|
||||
// t U
|
||||
// \ /
|
||||
// m
|
||||
//
|
||||
// NB: t.Preds is (b, p), not (p, b).
|
||||
return func(v *Value, i int) {
|
||||
// Replace any uses of v in t and u with the value v must have,
|
||||
// given that we have arrived at that block.
|
||||
// Then move v to m and adjust its value accordingly;
|
||||
// this handles all other uses of v.
|
||||
argP, argQ := v.Args[cidx], v.Args[1^cidx]
|
||||
phi := t.Func.newValue(OpPhi, v.Type, t, v.Pos)
|
||||
phi.AddArg2(argQ, argP)
|
||||
t.replaceUses(v, phi)
|
||||
for bb := range visited {
|
||||
bb.replaceUses(v, argQ)
|
||||
}
|
||||
if v.Uses == 0 {
|
||||
return
|
||||
}
|
||||
v.moveTo(m, i)
|
||||
// The phi in m belongs to whichever pred idx corresponds to t.
|
||||
if m.Preds[0].b == t {
|
||||
v.SetArgs2(phi, argQ)
|
||||
} else {
|
||||
v.SetArgs2(argQ, phi)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(t.Preds) == 2 && len(u.Preds) == 1 && len(u.Succs) == 1 && u.Succs[0].b == t {
|
||||
// p q
|
||||
// \ /
|
||||
// b
|
||||
// |\
|
||||
// | u
|
||||
// |/
|
||||
// t
|
||||
//
|
||||
// After the CFG modifications, this will look like
|
||||
//
|
||||
// q
|
||||
// /
|
||||
// b
|
||||
// |\
|
||||
// p | u
|
||||
// \|/
|
||||
// t
|
||||
//
|
||||
// NB: t.Preds is (b or u, b or u, p).
|
||||
return func(v *Value, i int) {
|
||||
// Replace any uses of v in u. Then move v to t.
|
||||
argP, argQ := v.Args[cidx], v.Args[1^cidx]
|
||||
u.replaceUses(v, argQ)
|
||||
v.moveTo(t, i)
|
||||
v.SetArgs3(argQ, argQ, argP)
|
||||
if len(t.Preds) == 2 && len(u.Preds) == 1 {
|
||||
if visited := u.flowsTo(t, 5); visited != nil {
|
||||
// p q
|
||||
// \ /
|
||||
// b
|
||||
// |\
|
||||
// | U ((sub graph that satisfy condition in flowsTo))
|
||||
// |/
|
||||
// t
|
||||
//
|
||||
// After the CFG modifications, this will look like
|
||||
//
|
||||
// q
|
||||
// /
|
||||
// b
|
||||
// |\
|
||||
// p | U
|
||||
// \|/
|
||||
// t
|
||||
//
|
||||
// NB: t.Preds is (b or U, b or U, p).
|
||||
return func(v *Value, i int) {
|
||||
// Replace any uses of v in U. Then move v to t.
|
||||
argP, argQ := v.Args[cidx], v.Args[1^cidx]
|
||||
for bb := range visited {
|
||||
bb.replaceUses(v, argQ)
|
||||
}
|
||||
v.moveTo(t, i)
|
||||
v.SetArgs3(argQ, argQ, argP)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -511,3 +520,77 @@ func (v *Value) moveTo(dst *Block, i int) {
|
||||
src.Values[last] = nil
|
||||
src.Values = src.Values[:last]
|
||||
}
|
||||
|
||||
// flowsTo checks that the subgraph starting from v and ends at t is a DAG, with
|
||||
// the following constraints:
|
||||
//
|
||||
// (1) v can reach t.
|
||||
// (2) v's connected component removing the paths containing t is a DAG.
|
||||
// (3) The blocks in the subgraph G defined in (2) has all their preds also in G,
|
||||
// except v.
|
||||
// (4) The subgraph defined in (2) has a size smaller than cap.
|
||||
//
|
||||
// We know that the subgraph G defined in constraint (2)(3) has the property that v
|
||||
// dominates all the blocks in G:
|
||||
// If there exist a block x in G that is not dominated by v, then there exist a
|
||||
// path P from entry to x that does not contain v. Denote x's predecessor in P
|
||||
// as x', then x' must also be in G given constraint (3), same to its pred x''
|
||||
// in P. Given constraint (2), by going back in P we will in the end reach v,
|
||||
// which conflicts with the definition of P.
|
||||
//
|
||||
// Constraint (2)'s DAG requirement could be further relaxed to contain "internal"
|
||||
// loops that doesn't change the dominance relation of v. But that is more subtle
|
||||
// and requires another constraint on the source block v, and a more complex proof.
|
||||
// Furthermore optimizing the branch guarding a loop might bring less gains as the
|
||||
// loop itself might be the bottleneck.
|
||||
func (v *Block) flowsTo(t *Block, cap int) map[*Block]struct{} {
|
||||
seen := map[*Block]struct{}{}
|
||||
var boundedDFS func(b *Block)
|
||||
hasPathToT := false
|
||||
fullyExplored := true
|
||||
isDAG := true
|
||||
visited := map[*Block]struct{}{}
|
||||
boundedDFS = func(b *Block) {
|
||||
if _, ok := seen[b]; ok {
|
||||
return
|
||||
}
|
||||
if _, ok := visited[b]; ok {
|
||||
isDAG = false
|
||||
return
|
||||
}
|
||||
if b == t {
|
||||
// do not put t into seen, this way
|
||||
// if v can reach t's connected component without going through t,
|
||||
// it will fail the pred check after boundedDFSUntil.
|
||||
hasPathToT = true
|
||||
return
|
||||
}
|
||||
if len(seen) > cap {
|
||||
fullyExplored = false
|
||||
return
|
||||
}
|
||||
seen[b] = struct{}{}
|
||||
visited[b] = struct{}{}
|
||||
for _, se := range b.Succs {
|
||||
boundedDFS(se.b)
|
||||
if !(isDAG && fullyExplored) {
|
||||
return
|
||||
}
|
||||
}
|
||||
delete(visited, b)
|
||||
}
|
||||
boundedDFS(v)
|
||||
if hasPathToT && fullyExplored && isDAG {
|
||||
for b := range seen {
|
||||
if b != v {
|
||||
for _, se := range b.Preds {
|
||||
if _, ok := seen[se.b]; !ok {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return seen
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user