[dev.ssa] cmd/compile: lots of small rewrite optimizations

Small optimizations I noticed while looking at Giovanni's test cases.

More shifts by constants.
Indexed stores for smaller types.
Fold LEA into loads/stores.
More extending loads.
CMP $0 of AND -> TEST

Fix order of TEST ops.

Giovanni's test cases at https://gist.github.com/rasky/62fba94e3a20d1b05b2a

Change-Id: I7077bc0b5319bf05767eeb39f401f4bb4b39f635
Reviewed-on: https://go-review.googlesource.com/19086
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Keith Randall 2016-01-30 11:25:38 -08:00
parent f962f33035
commit 1cc5789df9
6 changed files with 1969 additions and 13 deletions

View File

@ -4003,13 +4003,18 @@ func (s *genState) genValue(v *ssa.Value) {
// Go assembler has swapped operands for UCOMISx relative to CMP, // Go assembler has swapped operands for UCOMISx relative to CMP,
// must account for that right here. // must account for that right here.
opregreg(v.Op.Asm(), regnum(v.Args[0]), regnum(v.Args[1])) opregreg(v.Op.Asm(), regnum(v.Args[0]), regnum(v.Args[1]))
case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst, case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_CONST p.To.Type = obj.TYPE_CONST
p.To.Offset = v.AuxInt p.To.Offset = v.AuxInt
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v.Args[0])
case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
x := regnum(v) x := regnum(v)
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
@ -4040,7 +4045,7 @@ func (s *genState) genValue(v *ssa.Value) {
p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = x p.To.Reg = x
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload, ssa.OpAMD64MOVOload: case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVWQZXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVLQZXload, ssa.OpAMD64MOVOload:
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
p.From.Reg = regnum(v.Args[0]) p.From.Reg = regnum(v.Args[0])
@ -4081,7 +4086,7 @@ func (s *genState) genValue(v *ssa.Value) {
p.To.Scale = 8 p.To.Scale = 8
p.To.Index = regnum(v.Args[1]) p.To.Index = regnum(v.Args[1])
addAux(&p.To, v) addAux(&p.To, v)
case ssa.OpAMD64MOVSSstoreidx4: case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[2]) p.From.Reg = regnum(v.Args[2])
@ -4090,6 +4095,24 @@ func (s *genState) genValue(v *ssa.Value) {
p.To.Scale = 4 p.To.Scale = 4
p.To.Index = regnum(v.Args[1]) p.To.Index = regnum(v.Args[1])
addAux(&p.To, v) addAux(&p.To, v)
case ssa.OpAMD64MOVWstoreidx2:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[2])
p.To.Type = obj.TYPE_MEM
p.To.Reg = regnum(v.Args[0])
p.To.Scale = 2
p.To.Index = regnum(v.Args[1])
addAux(&p.To, v)
case ssa.OpAMD64MOVBstoreidx1:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[2])
p.To.Type = obj.TYPE_MEM
p.To.Reg = regnum(v.Args[0])
p.To.Scale = 1
p.To.Index = regnum(v.Args[1])
addAux(&p.To, v)
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
p := Prog(v.Op.Asm()) p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
@ -4365,7 +4388,9 @@ func (s *genState) genValue(v *ssa.Value) {
} }
switch w.Op { switch w.Op {
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload,
ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore: ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload, ssa.OpAMD64MOVWQSXload,
ssa.OpAMD64MOVWQZXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVLQZXload:
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
if Debug_checknil != 0 && int(v.Line) > 1 { if Debug_checknil != 0 && int(v.Line) > 1 {
Warnl(int(v.Line), "removed nil check") Warnl(int(v.Line), "removed nil check")

View File

@ -51,6 +51,8 @@ Optimizations (better compiled code)
Note that this is challenging for ops that generate flags Note that this is challenging for ops that generate flags
because flagalloc wants to move those instructions around for because flagalloc wants to move those instructions around for
flag regeneration. flag regeneration.
- In forms like if ... { call } else { no call }, mark the call branch as unlikely.
- Non-constant rotate detection.
Optimizations (better compiler) Optimizations (better compiler)
------------------------------- -------------------------------

View File

@ -464,18 +464,63 @@
(XORB (MOVBconst [c]) x) -> (XORBconst [c] x) (XORB (MOVBconst [c]) x) -> (XORBconst [c] x)
(SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x) (SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x)
(SHLQ x (MOVLconst [c])) -> (SHLQconst [c&63] x)
(SHLQ x (MOVWconst [c])) -> (SHLQconst [c&63] x)
(SHLQ x (MOVBconst [c])) -> (SHLQconst [c&63] x)
(SHLL x (MOVQconst [c])) -> (SHLLconst [c&31] x)
(SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x) (SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x)
(SHLL x (MOVWconst [c])) -> (SHLLconst [c&31] x)
(SHLL x (MOVBconst [c])) -> (SHLLconst [c&31] x)
(SHLW x (MOVQconst [c])) -> (SHLWconst [c&31] x)
(SHLW x (MOVLconst [c])) -> (SHLWconst [c&31] x)
(SHLW x (MOVWconst [c])) -> (SHLWconst [c&31] x) (SHLW x (MOVWconst [c])) -> (SHLWconst [c&31] x)
(SHLW x (MOVBconst [c])) -> (SHLWconst [c&31] x)
(SHLB x (MOVQconst [c])) -> (SHLBconst [c&31] x)
(SHLB x (MOVLconst [c])) -> (SHLBconst [c&31] x)
(SHLB x (MOVWconst [c])) -> (SHLBconst [c&31] x)
(SHLB x (MOVBconst [c])) -> (SHLBconst [c&31] x) (SHLB x (MOVBconst [c])) -> (SHLBconst [c&31] x)
(SHRQ x (MOVQconst [c])) -> (SHRQconst [c&63] x) (SHRQ x (MOVQconst [c])) -> (SHRQconst [c&63] x)
(SHRQ x (MOVLconst [c])) -> (SHRQconst [c&63] x)
(SHRQ x (MOVWconst [c])) -> (SHRQconst [c&63] x)
(SHRQ x (MOVBconst [c])) -> (SHRQconst [c&63] x)
(SHRL x (MOVQconst [c])) -> (SHRLconst [c&31] x)
(SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x) (SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x)
(SHRL x (MOVWconst [c])) -> (SHRLconst [c&31] x)
(SHRL x (MOVBconst [c])) -> (SHRLconst [c&31] x)
(SHRW x (MOVQconst [c])) -> (SHRWconst [c&31] x)
(SHRW x (MOVLconst [c])) -> (SHRWconst [c&31] x)
(SHRW x (MOVWconst [c])) -> (SHRWconst [c&31] x) (SHRW x (MOVWconst [c])) -> (SHRWconst [c&31] x)
(SHRW x (MOVBconst [c])) -> (SHRWconst [c&31] x)
(SHRB x (MOVQconst [c])) -> (SHRBconst [c&31] x)
(SHRB x (MOVLconst [c])) -> (SHRBconst [c&31] x)
(SHRB x (MOVWconst [c])) -> (SHRBconst [c&31] x)
(SHRB x (MOVBconst [c])) -> (SHRBconst [c&31] x) (SHRB x (MOVBconst [c])) -> (SHRBconst [c&31] x)
(SARQ x (MOVQconst [c])) -> (SARQconst [c&63] x) (SARQ x (MOVQconst [c])) -> (SARQconst [c&63] x)
(SARQ x (MOVLconst [c])) -> (SARQconst [c&63] x)
(SARQ x (MOVWconst [c])) -> (SARQconst [c&63] x)
(SARQ x (MOVBconst [c])) -> (SARQconst [c&63] x)
(SARL x (MOVQconst [c])) -> (SARLconst [c&31] x)
(SARL x (MOVLconst [c])) -> (SARLconst [c&31] x) (SARL x (MOVLconst [c])) -> (SARLconst [c&31] x)
(SARL x (MOVWconst [c])) -> (SARLconst [c&31] x)
(SARL x (MOVBconst [c])) -> (SARLconst [c&31] x)
(SARW x (MOVQconst [c])) -> (SARWconst [c&31] x)
(SARW x (MOVLconst [c])) -> (SARWconst [c&31] x)
(SARW x (MOVWconst [c])) -> (SARWconst [c&31] x) (SARW x (MOVWconst [c])) -> (SARWconst [c&31] x)
(SARW x (MOVBconst [c])) -> (SARWconst [c&31] x)
(SARB x (MOVQconst [c])) -> (SARBconst [c&31] x)
(SARB x (MOVLconst [c])) -> (SARBconst [c&31] x)
(SARB x (MOVWconst [c])) -> (SARBconst [c&31] x)
(SARB x (MOVBconst [c])) -> (SARBconst [c&31] x) (SARB x (MOVBconst [c])) -> (SARBconst [c&31] x)
// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
@ -524,7 +569,18 @@
// multiple memory values alive simultaneously. // multiple memory values alive simultaneously.
(MOVBQSX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) (MOVBQSX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
(MOVBQZX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQZXload <v.Type> [off] {sym} ptr mem) (MOVBQZX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQZXload <v.Type> [off] {sym} ptr mem)
// TODO: more (MOVWQSX (MOVWload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
(MOVWQZX (MOVWload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVWQZXload <v.Type> [off] {sym} ptr mem)
(MOVLQSX (MOVLload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
(MOVLQZX (MOVLload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVLQZXload <v.Type> [off] {sym} ptr mem)
// Fold extensions and ANDs together.
(MOVBQZX (ANDBconst [c] x)) -> (ANDQconst [c & 0xff] x)
(MOVWQZX (ANDWconst [c] x)) -> (ANDQconst [c & 0xffff] x)
(MOVLQZX (ANDLconst [c] x)) -> (ANDQconst [c & 0xffffffff] x)
(MOVBQSX (ANDBconst [c] x)) && c & 0x80 == 0 -> (ANDQconst [c & 0x7f] x)
(MOVWQSX (ANDWconst [c] x)) && c & 0x8000 == 0 -> (ANDQconst [c & 0x7fff] x)
(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDQconst [c & 0x7fffffff] x)
// Don't extend before storing // Don't extend before storing
(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
@ -623,22 +679,63 @@
(MOVSSstoreidx4 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx val mem) -> (MOVSSstoreidx4 [addOff(off1, off2)] {sym} ptr idx val mem) (MOVSSstoreidx4 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx val mem) -> (MOVSSstoreidx4 [addOff(off1, off2)] {sym} ptr idx val mem)
(MOVSDloadidx8 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx mem) -> (MOVSDloadidx8 [addOff(off1, off2)] {sym} ptr idx mem) (MOVSDloadidx8 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx mem) -> (MOVSDloadidx8 [addOff(off1, off2)] {sym} ptr idx mem)
(MOVSDstoreidx8 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx val mem) -> (MOVSDstoreidx8 [addOff(off1, off2)] {sym} ptr idx val mem) (MOVSDstoreidx8 [off1] {sym} (ADDQconst [off2] {sym} ptr) idx val mem) -> (MOVSDstoreidx8 [addOff(off1, off2)] {sym} ptr idx val mem)
(MOVLstoreidx4 [off1] {sym} (ADDQconst [off2] ptr) idx val mem) -> (MOVLstoreidx4 [addOff(off1, off2)] {sym} ptr idx val mem)
(MOVWstoreidx2 [off1] {sym} (ADDQconst [off2] ptr) idx val mem) -> (MOVWstoreidx2 [addOff(off1, off2)] {sym} ptr idx val mem)
(MOVBstoreidx1 [off1] {sym} (ADDQconst [off2] ptr) idx val mem) -> (MOVBstoreidx1 [addOff(off1, off2)] {sym} ptr idx val mem)
(MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVQloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) (MOVQloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVQstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVSSloadidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) (MOVSSloadidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVSSstoreidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) ->
(MOVSDloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) (MOVSDloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem)
(MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVBstoreidx1 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVWstoreidx2 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVLstoreidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVQstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVSSstoreidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
(MOVSDstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) (MOVSDstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
(MOVBstore [off] {sym} (ADDQ ptr idx) val mem) -> (MOVBstoreidx1 [off] {sym} ptr idx val mem)
// fold LEAQs together
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && canMergeSym(sym1, sym2) ->
(LEAQ [addOff(off1,off2)] {mergeSym(sym1,sym2)} x)
// LEAQ into LEAQ1
(LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
(LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y)) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
(LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
// LEAQ1 into LEAQ
(LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) ->
(LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
// LEAQ into LEAQ[248]
(LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAQ2 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
(LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAQ4 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
(LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAQ8 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
// LEAQ[248] into LEAQ
(LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) ->
(LEAQ2 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
(LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) ->
(LEAQ4 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) ->
(LEAQ8 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y)
// lower Zero instructions with word sizes // lower Zero instructions with word sizes
(Zero [0] _ mem) -> mem (Zero [0] _ mem) -> mem
(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem) (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
@ -963,3 +1060,12 @@
(XORW x x) -> (MOVWconst [0]) (XORW x x) -> (MOVWconst [0])
(XORB x x) -> (MOVBconst [0]) (XORB x x) -> (MOVBconst [0])
// checking AND against 0.
(CMPQconst (ANDQ x y) [0]) -> (TESTQ x y)
(CMPLconst (ANDL x y) [0]) -> (TESTL x y)
(CMPWconst (ANDW x y) [0]) -> (TESTW x y)
(CMPBconst (ANDB x y) [0]) -> (TESTB x y)
(CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x)
(CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x)
(CMPWconst (ANDWconst [c] x) [0]) -> (TESTWconst [c] x)
(CMPBconst (ANDBconst [c] x) [0]) -> (TESTBconst [c] x)

View File

@ -368,14 +368,22 @@ func init() {
{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"}, // ditto, extend to int64 {name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"}, // ditto, extend to int64
{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"}, // ditto, extend to uint64 {name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"}, // ditto, extend to uint64
{name: "MOVWload", reg: gpload, asm: "MOVW", typ: "UInt16"}, // load 2 bytes from arg0+auxint+aux. arg1=mem {name: "MOVWload", reg: gpload, asm: "MOVW", typ: "UInt16"}, // load 2 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVWQSXload", reg: gpload, asm: "MOVWQSX"}, // ditto, extend to int64
{name: "MOVWQZXload", reg: gpload, asm: "MOVWQZX"}, // ditto, extend to uint64
{name: "MOVLload", reg: gpload, asm: "MOVL", typ: "UInt32"}, // load 4 bytes from arg0+auxint+aux. arg1=mem {name: "MOVLload", reg: gpload, asm: "MOVL", typ: "UInt32"}, // load 4 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVLQSXload", reg: gpload, asm: "MOVLQSX"}, // ditto, extend to int64
{name: "MOVLQZXload", reg: gpload, asm: "MOVLQZX"}, // ditto, extend to uint64
{name: "MOVQload", reg: gpload, asm: "MOVQ", typ: "UInt64"}, // load 8 bytes from arg0+auxint+aux. arg1=mem {name: "MOVQload", reg: gpload, asm: "MOVQ", typ: "UInt64"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"}, // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem {name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"}, // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
{name: "MOVBstore", reg: gpstore, asm: "MOVB", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVBstore", reg: gpstore, asm: "MOVB", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVWstore", reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", reg: gpstore, asm: "MOVL", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVLstore", reg: gpstore, asm: "MOVL", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstore", reg: gpstore, asm: "MOVQ", typ: "Mem"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVQstore", reg: gpstore, asm: "MOVQ", typ: "Mem"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
{name: "MOVBstoreidx1", reg: gpstoreidx, asm: "MOVB"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVWstoreidx2", reg: gpstoreidx, asm: "MOVW"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
{name: "MOVLstoreidx4", reg: gpstoreidx, asm: "MOVL"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
{name: "MOVOload", reg: fpload, asm: "MOVUPS", typ: "Int128"}, // load 16 bytes from arg0+auxint+aux. arg1=mem {name: "MOVOload", reg: fpload, asm: "MOVUPS", typ: "Int128"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
{name: "MOVOstore", reg: fpstore, asm: "MOVUPS", typ: "Mem"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVOstore", reg: fpstore, asm: "MOVUPS", typ: "Mem"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem

View File

@ -254,13 +254,20 @@ const (
OpAMD64MOVBQSXload OpAMD64MOVBQSXload
OpAMD64MOVBQZXload OpAMD64MOVBQZXload
OpAMD64MOVWload OpAMD64MOVWload
OpAMD64MOVWQSXload
OpAMD64MOVWQZXload
OpAMD64MOVLload OpAMD64MOVLload
OpAMD64MOVLQSXload
OpAMD64MOVLQZXload
OpAMD64MOVQload OpAMD64MOVQload
OpAMD64MOVQloadidx8 OpAMD64MOVQloadidx8
OpAMD64MOVBstore OpAMD64MOVBstore
OpAMD64MOVWstore OpAMD64MOVWstore
OpAMD64MOVLstore OpAMD64MOVLstore
OpAMD64MOVQstore OpAMD64MOVQstore
OpAMD64MOVBstoreidx1
OpAMD64MOVWstoreidx2
OpAMD64MOVLstoreidx4
OpAMD64MOVQstoreidx8 OpAMD64MOVQstoreidx8
OpAMD64MOVOload OpAMD64MOVOload
OpAMD64MOVOstore OpAMD64MOVOstore
@ -2966,6 +2973,30 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVWQSXload",
asm: x86.AMOVWQSX,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
},
},
{
name: "MOVWQZXload",
asm: x86.AMOVWQZX,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
},
},
{ {
name: "MOVLload", name: "MOVLload",
asm: x86.AMOVL, asm: x86.AMOVL,
@ -2978,6 +3009,30 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVLQSXload",
asm: x86.AMOVLQSX,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
},
},
{
name: "MOVLQZXload",
asm: x86.AMOVLQZX,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
outputs: []regMask{
65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
},
},
},
{ {
name: "MOVQload", name: "MOVQload",
asm: x86.AMOVQ, asm: x86.AMOVQ,
@ -3043,6 +3098,39 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVBstoreidx1",
asm: x86.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{2, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
},
},
{
name: "MOVWstoreidx2",
asm: x86.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{2, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
},
},
{
name: "MOVLstoreidx4",
asm: x86.AMOVL,
reg: regInfo{
inputs: []inputInfo{
{1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{2, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
},
},
},
{ {
name: "MOVQstoreidx8", name: "MOVQstoreidx8",
asm: x86.AMOVQ, asm: x86.AMOVQ,

File diff suppressed because it is too large Load Diff