diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go index 4392081f6e..952a2050a0 100644 --- a/src/cmd/compile/internal/riscv64/ssa.go +++ b/src/cmd/compile/internal/riscv64/ssa.go @@ -419,7 +419,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, - ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: + ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index 016eb53f04..96b9b11cf9 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -225,6 +225,12 @@ (Ctz16 x) => (CTZW (ORI [1<<16] x)) (Ctz8 x) => (CTZW (ORI [1<<8] x)) +// Bit length (note that these will only be emitted for rva22u64 and above). +(BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) +(BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) +(BitLen16 x) => (BitLen64 (ZeroExt16to64 x)) +(BitLen8 x) => (BitLen64 (ZeroExt8to64 x)) + (Less64 ...) => (SLT ...) (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go index 85e9e47e82..cc2302ff37 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go @@ -229,6 +229,8 @@ func init() { {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // ^arg0 & arg1 {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint + {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zeros + {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zeros of least significant word {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 3fd5b310ac..0ae88ab246 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2523,6 +2523,8 @@ const ( OpRISCV64AND OpRISCV64ANDN OpRISCV64ANDI + OpRISCV64CLZ + OpRISCV64CLZW OpRISCV64CTZ OpRISCV64CTZW OpRISCV64NOT @@ -34004,6 +34006,32 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "CLZ", + argLen: 1, + asm: riscv.ACLZ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, + { + name: "CLZW", + argLen: 1, + asm: riscv.ACLZW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, { name: "CTZ", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index ab93309680..b2318e711b 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -102,6 +102,14 @@ func rewriteValueRISCV64(v *Value) bool { return true case OpAvg64u: return rewriteValueRISCV64_OpAvg64u(v) + case OpBitLen16: + return rewriteValueRISCV64_OpBitLen16(v) + case OpBitLen32: + return rewriteValueRISCV64_OpBitLen32(v) + case OpBitLen64: + return rewriteValueRISCV64_OpBitLen64(v) + case OpBitLen8: + return rewriteValueRISCV64_OpBitLen8(v) case OpClosureCall: v.Op = OpRISCV64CALLclosure return true @@ -928,6 +936,72 @@ func rewriteValueRISCV64_OpAvg64u(v *Value) bool { return true } } +func rewriteValueRISCV64_OpBitLen16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (BitLen16 x) + // result: (BitLen64 (ZeroExt16to64 x)) + for { + x := v_0 + v.reset(OpBitLen64) + v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueRISCV64_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (BitLen32 x) + // result: (SUB (MOVDconst [32]) (CLZW x)) + for { + t := v.Type + x := v_0 + v.reset(OpRISCV64SUB) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpRISCV64CLZW, t) + v1.AddArg(x) + v.AddArg2(v0, v1) + return true + } +} +func rewriteValueRISCV64_OpBitLen64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (BitLen64 x) + // result: (SUB (MOVDconst [64]) (CLZ x)) + for { + t := v.Type + x := v_0 + v.reset(OpRISCV64SUB) + v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpRISCV64CLZ, t) + v1.AddArg(x) + v.AddArg2(v0, v1) + return true + } +} +func rewriteValueRISCV64_OpBitLen8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (BitLen8 x) + // result: (BitLen64 (ZeroExt8to64 x)) + for { + x := v_0 + v.reset(OpBitLen64) + v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} func rewriteValueRISCV64_OpConst16(v *Value) bool { // match: (Const16 [val]) // result: (MOVDconst [int64(val)]) diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index f2b13045eb..eaced0b277 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -962,6 +962,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) }, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + + if cfg.goriscv64 >= 22 { + addF("math/bits", "Len64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) + }, + sys.RISCV64) + addF("math/bits", "Len32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) + }, + sys.RISCV64) + addF("math/bits", "Len16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) + }, + sys.RISCV64) + addF("math/bits", "Len8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) + }, + sys.RISCV64) + } + alias("math/bits", "Len", "math/bits", "Len64", p8...) alias("math/bits", "Len", "math/bits", "Len32", p4...) diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index a06fdeedb2..230a7bdf67 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -1110,6 +1110,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, {"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, {"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "Len64"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "Len8"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, @@ -1120,6 +1122,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"riscv64", "math/big", "mulWW"}: struct{}{}, {"riscv64", "math/bits", "Add"}: struct{}{}, {"riscv64", "math/bits", "Add64"}: struct{}{}, + {"riscv64", "math/bits", "Len"}: struct{}{}, + {"riscv64", "math/bits", "Len16"}: struct{}{}, + {"riscv64", "math/bits", "Len32"}: struct{}{}, + {"riscv64", "math/bits", "Len64"}: struct{}{}, + {"riscv64", "math/bits", "Len8"}: struct{}{}, {"riscv64", "math/bits", "Mul"}: struct{}{}, {"riscv64", "math/bits", "Mul64"}: struct{}{}, {"riscv64", "math/bits", "RotateLeft"}: struct{}{}, diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 786fad3bd9..a9cf466780 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -15,60 +15,70 @@ import "math/bits" func LeadingZeros(n uint) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3:"LZCNTQ", -"BSRQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"SUB" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros(n) } func LeadingZeros64(n uint64) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3:"LZCNTQ", -"BSRQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm:"CLZ" + // arm64:"CLZ" // loong64:"CLZV",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros64(n) } func LeadingZeros32(n uint32) int { // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZW" + // arm:"CLZ" + // arm64:"CLZW" // loong64:"CLZW",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZW" + // riscv64/rva22u64,riscv64/rva23u64:"CLZW",-"ADDI" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros32(n) } func LeadingZeros16(n uint16) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-48",-"NEG" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros16(n) } func LeadingZeros8(n uint8) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-56",-"NEG" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros8(n) } @@ -79,30 +89,35 @@ func LeadingZeros8(n uint8) int { func Len(n uint) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3: "LZCNTQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len(n) } func Len64(n uint64) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3: "LZCNTQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len64(n) } func SubFromLen64(n uint64) int { // loong64:"CLZV",-"ADD" // ppc64x:"CNTLZD",-"SUBC" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI",-"NEG" return 64 - bits.Len64(n) } @@ -114,36 +129,42 @@ func CompareWithLen64(n uint64) bool { func Len32(n uint32) int { // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZW" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x: "CNTLZW" + // riscv64/rva22u64,riscv64/rva23u64:"CLZW","ADDI\t\\$-32" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len32(n) } func Len16(n uint16) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len16(n) } func Len8(n uint8) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len8(n) } @@ -451,6 +472,7 @@ func IterateBits64(n uint64) int { for n != 0 { // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" // amd64/v3:"TZCNTQ" + // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" i += bits.TrailingZeros64(n) n &= n - 1 } @@ -462,6 +484,7 @@ func IterateBits32(n uint32) int { for n != 0 { // amd64/v1,amd64/v2:"BSFL",-"BTSQ" // amd64/v3:"TZCNTL" + // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" i += bits.TrailingZeros32(n) n &= n - 1 }