diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index e92c58b7d8..6570e8a5a4 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -798,6 +798,8 @@ // generic constant folding (ADDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (SUBconst [int64(int32(-c))] x) (SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x) +(ANDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (BICconst [int64(^uint32(c))] x) +(BICconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (ANDconst [int64(^uint32(c))] x) (ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))]) (ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x) (ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x) diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index 0ca4e41e4e..65e97e13d8 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -3223,6 +3223,20 @@ func rewriteValueARM_OpARMANDconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDconst [c] x) + // cond: !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) + // result: (BICconst [int64(^uint32(c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(!isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))) { + break + } + v.reset(OpARMBICconst) + v.AuxInt = int64(^uint32(c)) + v.AddArg(x) + return true + } // match: (ANDconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [c&d]) @@ -3722,6 +3736,20 @@ func rewriteValueARM_OpARMBICconst_0(v *Value) bool { v.AuxInt = 0 return true } + // match: (BICconst [c] x) + // cond: !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) + // result: (ANDconst [int64(^uint32(c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(!isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c))) { + break + } + v.reset(OpARMANDconst) + v.AuxInt = int64(^uint32(c)) + v.AddArg(x) + return true + } // match: (BICconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [d&^c]) diff --git a/src/cmd/internal/obj/arm/a.out.go b/src/cmd/internal/obj/arm/a.out.go index ea153a30da..6ea7d4be3b 100644 --- a/src/cmd/internal/obj/arm/a.out.go +++ b/src/cmd/internal/obj/arm/a.out.go @@ -121,10 +121,11 @@ const ( C_PSR C_FCR - C_RCON /* 0xff rotated */ - C_NCON /* ~RCON */ - C_RCON2 /* OR of two disjoint C_RCON constants */ - C_SCON /* 0xffff */ + C_RCON /* 0xff rotated */ + C_NCON /* ~RCON */ + C_RCON2A /* OR of two disjoint C_RCON constants */ + C_RCON2S /* subtraction of two disjoint C_RCON constants */ + C_SCON /* 0xffff */ C_LCON C_LCONADDR C_ZFCON diff --git a/src/cmd/internal/obj/arm/anames5.go b/src/cmd/internal/obj/arm/anames5.go index 05892def04..bb98d3b081 100644 --- a/src/cmd/internal/obj/arm/anames5.go +++ b/src/cmd/internal/obj/arm/anames5.go @@ -16,7 +16,8 @@ var cnames5 = []string{ "FCR", "RCON", "NCON", - "RCON2", + "RCON2A", + "RCON2S", "SCON", "LCON", "LCONADDR", diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go index 28bd7f8020..f3122f725e 100644 --- a/src/cmd/internal/obj/arm/asm5.go +++ b/src/cmd/internal/obj/arm/asm5.go @@ -88,6 +88,8 @@ var optab = []Optab{ {AADD, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {AAND, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, + {AORR, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0}, + {AORR, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {AMVN, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, {ACMP, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0}, @@ -95,6 +97,8 @@ var optab = []Optab{ {AADD, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0}, {AAND, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0}, {AAND, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0}, + {AORR, C_RCON, C_REG, C_REG, 2, 4, 0, 0, 0}, + {AORR, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0}, {AMOVW, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0}, {AMVN, C_RCON, C_NONE, C_REG, 2, 4, 0, 0, 0}, {ACMP, C_RCON, C_REG, C_NONE, 2, 4, 0, 0, 0}, @@ -102,6 +106,8 @@ var optab = []Optab{ {AADD, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0}, {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, + {AORR, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0}, + {AORR, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, {AMVN, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0}, {ACMP, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0}, {AMOVW, C_RACON, C_NONE, C_REG, 4, 4, REGSP, 0, 0}, @@ -136,20 +142,27 @@ var optab = []Optab{ {AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0}, {AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, + {AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0}, + {AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0}, {AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0}, {AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AAND, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0}, {AAND, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, + {AORR, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0}, + {AORR, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0}, - {AADD, C_RCON2, C_REG, C_REG, 106, 8, 0, 0, 0}, - // TODO: RCON2: how to do AND and BIC? + {AADD, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0}, + {AORR, C_RCON2A, C_REG, C_REG, 106, 8, 0, 0, 0}, + {AADD, C_RCON2S, C_REG, C_REG, 107, 8, 0, 0, 0}, {AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0}, {AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, {AAND, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0}, {AAND, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, + {AORR, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0}, + {AORR, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, {AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, {ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM, 0}, {AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, @@ -970,10 +983,10 @@ func immrot(v uint32) int32 { return 0 } -// immrot2 returns bits encoding the immediate constant fields of two instructions, +// immrot2a returns bits encoding the immediate constant fields of two instructions, // such that the encoded constants x, y satisfy x|y==v, x&y==0. // Returns 0,0 if no such decomposition of v exists. -func immrot2(v uint32) (uint32, uint32) { +func immrot2a(v uint32) (uint32, uint32) { for i := uint(1); i < 32; i++ { m := uint32(1<= 0 && v <= 0xfff { return v&0xfff | 1<<24 | 1<<23 /* pre indexing */ /* pre indexing, up */ @@ -1159,8 +1198,11 @@ func (c *ctxt5) aclass(a *obj.Addr) int { if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 { return C_SCON } - if x, y := immrot2(uint32(c.instoffset)); x != 0 && y != 0 { - return C_RCON2 + if x, y := immrot2a(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2A + } + if y, x := immrot2s(uint32(c.instoffset)); x != 0 && y != 0 { + return C_RCON2S } return C_LCON @@ -1226,13 +1268,12 @@ func (c *ctxt5) oplook(p *obj.Prog) *Optab { a2 = C_REG } - // If Scond != 0, we must use the constant pool instead of - // splitting the instruction in two. The most common reason is - // .S (flag updating) instructions. There may be others. - if a1 == C_RCON2 && p.Scond != 0 { + // If current instruction has a .S suffix (flags update), + // we must use the constant pool instead of splitting it. + if (a1 == C_RCON2A || a1 == C_RCON2S) && p.Scond&C_SBIT != 0 { a1 = C_LCON } - if a3 == C_RCON2 && p.Scond != 0 { + if (a3 == C_RCON2A || a3 == C_RCON2S) && p.Scond&C_SBIT != 0 { a3 = C_LCON } @@ -1266,7 +1307,7 @@ func cmp(a int, b int) bool { } switch a { case C_LCON: - if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2 { + if b == C_RCON || b == C_NCON || b == C_SCON || b == C_RCON2A || b == C_RCON2S { return true } @@ -1406,16 +1447,14 @@ func buildop(ctxt *obj.Link) { log.Fatalf("bad code") case AADD: - opset(AEOR, r0) opset(ASUB, r0) opset(ARSB, r0) opset(AADC, r0) opset(ASBC, r0) opset(ARSC, r0) - opset(AORR, r0) - case AAND: - opset(AAND, r0) + case AORR: + opset(AEOR, r0) opset(ABIC, r0) case ACMP: @@ -1541,6 +1580,7 @@ func buildop(ctxt *obj.Link) { ALDREXD, ASTREXD, APLD, + AAND, obj.AUNDEF, obj.AFUNCDATA, obj.APCDATA, @@ -1609,11 +1649,11 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) { c.aclass(&p.From) r := int(p.Reg) rt := int(p.To.Reg) - x, y := immrot2(uint32(c.instoffset)) + x, y := immrot2a(uint32(c.instoffset)) var as2 obj.As switch p.As { - case AADD, ASUB, AORR, AEOR: - as2 = p.As // ADD, SUB, ORR, EOR + case AADD, ASUB, AORR, AEOR, ABIC: + as2 = p.As // ADD, SUB, ORR, EOR, BIC case ARSB: as2 = AADD // RSB -> RSB/ADD pair case AADC: @@ -1632,6 +1672,35 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= x o2 |= y + case 107: /* op $I,R,R where I can be decomposed into 2 immediates */ + c.aclass(&p.From) + r := int(p.Reg) + rt := int(p.To.Reg) + y, x := immrot2s(uint32(c.instoffset)) + var as2 obj.As + switch p.As { + case AADD: + as2 = ASUB // ADD -> ADD/SUB pair + case ASUB: + as2 = AADD // SUB -> SUB/ADD pair + case ARSB: + as2 = ASUB // RSB -> RSB/SUB pair + case AADC: + as2 = ASUB // ADC -> ADC/SUB pair + case ASBC: + as2 = AADD // SBC -> SBC/ADD pair + case ARSC: + as2 = ASUB // RSC -> RSC/SUB pair + default: + c.ctxt.Diag("unknown second op for %v", p) + } + o1 = c.oprrr(p, p.As, int(p.Scond)) + o2 = c.oprrr(p, as2, int(p.Scond)) + o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12 + o2 |= (uint32(rt)&15)<<16 | (uint32(rt)&15)<<12 + o1 |= y + o2 |= x + case 3: /* add R<<[IR],[R],R */ o1 = c.mov(p) diff --git a/test/armimm.go b/test/armimm.go index f3fb516ed4..65124ad47a 100644 --- a/test/armimm.go +++ b/test/armimm.go @@ -11,57 +11,99 @@ package main import "fmt" -const c32 = 0xaa00dd -const c64 = 0xaa00dd55000066 +const c32a = 0x00aa00dd +const c32s = 0x00ffff00 +const c64a = 0x00aa00dd55000066 +const c64s = 0x00ffff00004fff00 //go:noinline -func add32(x uint32) uint32 { - return x + c32 +func add32a(x uint32) uint32 { + return x + c32a } //go:noinline -func sub32(x uint32) uint32 { - return x - c32 +func add32s(x uint32) uint32 { + return x + c32s +} + +//go:noinline +func sub32a(x uint32) uint32 { + return x - c32a +} + +//go:noinline +func sub32s(x uint32) uint32 { + return x - c32s } //go:noinline func or32(x uint32) uint32 { - return x | c32 + return x | c32a } //go:noinline func xor32(x uint32) uint32 { - return x ^ c32 + return x ^ c32a } //go:noinline -func subr32(x uint32) uint32 { - return c32 - x +func subr32a(x uint32) uint32 { + return c32a - x } //go:noinline -func add64(x uint64) uint64 { - return x + c64 +func subr32s(x uint32) uint32 { + return c32s - x } //go:noinline -func sub64(x uint64) uint64 { - return x - c64 +func bic32(x uint32) uint32 { + return x &^ c32a +} + +//go:noinline +func add64a(x uint64) uint64 { + return x + c64a +} + +//go:noinline +func add64s(x uint64) uint64 { + return x + c64s +} + +//go:noinline +func sub64a(x uint64) uint64 { + return x - c64a +} + +//go:noinline +func sub64s(x uint64) uint64 { + return x - c64s } //go:noinline func or64(x uint64) uint64 { - return x | c64 + return x | c64a } //go:noinline func xor64(x uint64) uint64 { - return x ^ c64 + return x ^ c64a } //go:noinline -func subr64(x uint64) uint64 { - return c64 - x +func subr64a(x uint64) uint64 { + return c64a - x +} + +//go:noinline +func subr64s(x uint64) uint64 { + return c64s - x +} + +//go:noinline +func bic64(x uint64) uint64 { + return x &^ c64a } // Note: x-c gets rewritten to x+(-c), so SUB and SBC are not directly testable. @@ -75,39 +117,63 @@ func main() { func test32() { var a uint32 = 0x11111111 var want, got uint32 - if want, got = a+c32, add32(a); got != want { - panic(fmt.Sprintf("add32(%x) = %x, want %x", a, got, want)) + if want, got = a+c32a, add32a(a); got != want { + panic(fmt.Sprintf("add32a(%x) = %x, want %x", a, got, want)) } - if want, got = a-c32, sub32(a); got != want { - panic(fmt.Sprintf("sub32(%x) = %x, want %x", a, got, want)) + if want, got = a+c32s, add32s(a); got != want { + panic(fmt.Sprintf("add32s(%x) = %x, want %x", a, got, want)) } - if want, got = a|c32, or32(a); got != want { + if want, got = a-c32a, sub32a(a); got != want { + panic(fmt.Sprintf("sub32a(%x) = %x, want %x", a, got, want)) + } + if want, got = a-c32s, sub32s(a); got != want { + panic(fmt.Sprintf("sub32s(%x) = %x, want %x", a, got, want)) + } + if want, got = a|c32a, or32(a); got != want { panic(fmt.Sprintf("or32(%x) = %x, want %x", a, got, want)) } - if want, got = a^c32, xor32(a); got != want { + if want, got = a^c32a, xor32(a); got != want { panic(fmt.Sprintf("xor32(%x) = %x, want %x", a, got, want)) } - if want, got = c32-a, subr32(a); got != want { - panic(fmt.Sprintf("subr32(%x) = %x, want %x", a, got, want)) + if want, got = c32a-a, subr32a(a); got != want { + panic(fmt.Sprintf("subr32a(%x) = %x, want %x", a, got, want)) + } + if want, got = c32s-a, subr32s(a); got != want { + panic(fmt.Sprintf("subr32s(%x) = %x, want %x", a, got, want)) + } + if want, got = a&^c32a, bic32(a); got != want { + panic(fmt.Sprintf("bic32(%x) = %x, want %x", a, got, want)) } } func test64() { var a uint64 = 0x1111111111111111 var want, got uint64 - if want, got = a+c64, add64(a); got != want { - panic(fmt.Sprintf("add64(%x) = %x, want %x", a, got, want)) + if want, got = a+c64a, add64a(a); got != want { + panic(fmt.Sprintf("add64a(%x) = %x, want %x", a, got, want)) } - if want, got = a-c64, sub64(a); got != want { - panic(fmt.Sprintf("sub64(%x) = %x, want %x", a, got, want)) + if want, got = a+c64s, add64s(a); got != want { + panic(fmt.Sprintf("add64s(%x) = %x, want %x", a, got, want)) } - if want, got = a|c64, or64(a); got != want { + if want, got = a-c64a, sub64a(a); got != want { + panic(fmt.Sprintf("sub64a(%x) = %x, want %x", a, got, want)) + } + if want, got = a-c64s, sub64s(a); got != want { + panic(fmt.Sprintf("sub64s(%x) = %x, want %x", a, got, want)) + } + if want, got = a|c64a, or64(a); got != want { panic(fmt.Sprintf("or64(%x) = %x, want %x", a, got, want)) } - if want, got = a^c64, xor64(a); got != want { + if want, got = a^c64a, xor64(a); got != want { panic(fmt.Sprintf("xor64(%x) = %x, want %x", a, got, want)) } - if want, got = c64-a, subr64(a); got != want { - panic(fmt.Sprintf("subr64(%x) = %x, want %x", a, got, want)) + if want, got = c64a-a, subr64a(a); got != want { + panic(fmt.Sprintf("subr64a(%x) = %x, want %x", a, got, want)) + } + if want, got = c64s-a, subr64s(a); got != want { + panic(fmt.Sprintf("subr64s(%x) = %x, want %x", a, got, want)) + } + if want, got = a&^c64a, bic64(a); got != want { + panic(fmt.Sprintf("bic64(%x) = %x, want %x", a, got, want)) } }