cmd/compile,internal/cpu,runtime: intrinsify math/bits.OnesCount on riscv64

For riscv64/rva22u64 and above, we can intrinsify math/bits.OnesCount
using the CPOP/CPOPW machine instructions. Since the native Go
implementation of OnesCount is relatively expensive, it is also
worth emitting a check for Zbb support when compiled for rva20u64.

On a Banana Pi F3, with GORISCV64=rva22u64:

              │     oc.1     │                oc.2                 │
              │    sec/op    │   sec/op     vs base                │
OnesCount-8     16.930n ± 0%   4.389n ± 0%  -74.08% (p=0.000 n=10)
OnesCount8-8     5.642n ± 0%   5.016n ± 0%  -11.10% (p=0.000 n=10)
OnesCount16-8    9.404n ± 0%   5.015n ± 0%  -46.67% (p=0.000 n=10)
OnesCount32-8   13.165n ± 0%   4.388n ± 0%  -66.67% (p=0.000 n=10)
OnesCount64-8   16.300n ± 0%   4.388n ± 0%  -73.08% (p=0.000 n=10)
geomean          11.40n        4.629n       -59.40%

On a Banana Pi F3, compiled with GORISCV64=rva20u64 and with Zbb
detection enabled:

              │     oc.3     │                oc.4                 │
              │    sec/op    │   sec/op     vs base                │
OnesCount-8     16.930n ± 0%   5.643n ± 0%  -66.67% (p=0.000 n=10)
OnesCount8-8     5.642n ± 0%   5.642n ± 0%        ~ (p=0.447 n=10)
OnesCount16-8   10.030n ± 0%   6.896n ± 0%  -31.25% (p=0.000 n=10)
OnesCount32-8   13.170n ± 0%   5.642n ± 0%  -57.16% (p=0.000 n=10)
OnesCount64-8   16.300n ± 0%   5.642n ± 0%  -65.39% (p=0.000 n=10)
geomean          11.55n        5.873n       -49.16%

On a Banana Pi F3, compiled with GORISCV64=rva20u64 but with Zbb
detection disabled:

              │    oc.3     │                oc.5                 │
              │   sec/op    │   sec/op     vs base                │
OnesCount-8     16.93n ± 0%   29.47n ± 0%  +74.07% (p=0.000 n=10)
OnesCount8-8    5.642n ± 0%   5.643n ± 0%        ~ (p=0.191 n=10)
OnesCount16-8   10.03n ± 0%   15.05n ± 0%  +50.05% (p=0.000 n=10)
OnesCount32-8   13.17n ± 0%   18.18n ± 0%  +38.04% (p=0.000 n=10)
OnesCount64-8   16.30n ± 0%   21.94n ± 0%  +34.60% (p=0.000 n=10)
geomean         11.55n        15.84n       +37.16%

For hardware without Zbb, this adds ~5ns overhead, while for hardware
with Zbb we achieve a performance gain up of up to 11ns. It is worth
noting that OnesCount8 is cheap enough that it is preferable to stick
with the generic version in this case.

Change-Id: Id657e40e0dd1b1ab8cc0fe0f8a68df4c9f2d7da5
Reviewed-on: https://go-review.googlesource.com/c/go/+/660856
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Joel Sing 2025-03-19 23:57:23 +11:00
parent 90e8b8cdae
commit 4d10d4ad84
18 changed files with 157 additions and 6 deletions

View File

@ -64,6 +64,7 @@ type symsStruct struct {
Loong64HasLAMCAS *obj.LSym Loong64HasLAMCAS *obj.LSym
Loong64HasLAM_BH *obj.LSym Loong64HasLAM_BH *obj.LSym
Loong64HasLSX *obj.LSym Loong64HasLSX *obj.LSym
RISCV64HasZbb *obj.LSym
X86HasFMA *obj.LSym X86HasFMA *obj.LSym
X86HasPOPCNT *obj.LSym X86HasPOPCNT *obj.LSym
X86HasSSE41 *obj.LSym X86HasSSE41 *obj.LSym

View File

@ -420,7 +420,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,
ssa.OpRISCV64REV8: ssa.OpRISCV64REV8, ssa.OpRISCV64CPOP, ssa.OpRISCV64CPOPW:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg() p.From.Reg = v.Args[0].Reg()

View File

@ -236,6 +236,12 @@
(Bswap32 <t> x) => (SRLI [32] (REV8 <t> x)) (Bswap32 <t> x) => (SRLI [32] (REV8 <t> x))
(Bswap16 <t> x) => (SRLI [48] (REV8 <t> x)) (Bswap16 <t> x) => (SRLI [48] (REV8 <t> x))
// Population count (note that these will be emitted with guards for rva20u64).
(PopCount64 ...) => (CPOP ...)
(PopCount32 ...) => (CPOPW ...)
(PopCount16 x) => (CPOP (ZeroExt16to64 x))
(PopCount8 x) => (CPOP (ZeroExt8to64 x))
(Less64 ...) => (SLT ...) (Less64 ...) => (SLT ...)
(Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y))
(Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y))

View File

@ -231,6 +231,8 @@ func init() {
{name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zeros {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zeros
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zeros of least significant word {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zeros of least significant word
{name: "CPOP", argLength: 1, reg: gp11, asm: "CPOP"}, // count set bits
{name: "CPOPW", argLength: 1, reg: gp11, asm: "CPOPW"}, // count set bits in least significant word
{name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros
{name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word
{name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0 {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0

View File

@ -2514,6 +2514,8 @@ const (
OpRISCV64ANDI OpRISCV64ANDI
OpRISCV64CLZ OpRISCV64CLZ
OpRISCV64CLZW OpRISCV64CLZW
OpRISCV64CPOP
OpRISCV64CPOPW
OpRISCV64CTZ OpRISCV64CTZ
OpRISCV64CTZW OpRISCV64CTZW
OpRISCV64NOT OpRISCV64NOT
@ -33887,6 +33889,32 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "CPOP",
argLen: 1,
asm: riscv.ACPOP,
reg: regInfo{
inputs: []inputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{
name: "CPOPW",
argLen: 1,
asm: riscv.ACPOPW,
reg: regInfo{
inputs: []inputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{ {
name: "CTZ", name: "CTZ",
argLen: 1, argLen: 1,

View File

@ -487,6 +487,16 @@ func rewriteValueRISCV64(v *Value) bool {
return true return true
case OpPanicBounds: case OpPanicBounds:
return rewriteValueRISCV64_OpPanicBounds(v) return rewriteValueRISCV64_OpPanicBounds(v)
case OpPopCount16:
return rewriteValueRISCV64_OpPopCount16(v)
case OpPopCount32:
v.Op = OpRISCV64CPOPW
return true
case OpPopCount64:
v.Op = OpRISCV64CPOP
return true
case OpPopCount8:
return rewriteValueRISCV64_OpPopCount8(v)
case OpPubBarrier: case OpPubBarrier:
v.Op = OpRISCV64LoweredPubBarrier v.Op = OpRISCV64LoweredPubBarrier
return true return true
@ -3458,6 +3468,36 @@ func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
} }
return false return false
} }
func rewriteValueRISCV64_OpPopCount16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (PopCount16 x)
// result: (CPOP (ZeroExt16to64 x))
for {
x := v_0
v.reset(OpRISCV64CPOP)
v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueRISCV64_OpPopCount8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (PopCount8 x)
// result: (CPOP (ZeroExt8to64 x))
for {
x := v_0
v.reset(OpRISCV64CPOP)
v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool { func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]

View File

@ -1129,12 +1129,49 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
} }
} }
makeOnesCountRISCV64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if cfg.goriscv64 >= 22 {
return s.newValue1(op, types.Types[types.TINT], args[0])
}
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.RISCV64HasZbb, s.sb)
v := s.load(types.Types[types.TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely // Majority of RISC-V support Zbb.
// We have the intrinsic - use it directly.
s.startBlock(bTrue)
s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
s.endBlock().AddEdgeTo(bEnd)
// Call the pure Go version.
s.startBlock(bFalse)
s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
return s.variable(n, types.Types[types.TINT])
}
}
addF("math/bits", "OnesCount64", addF("math/bits", "OnesCount64",
makeOnesCountAMD64(ssa.OpPopCount64), makeOnesCountAMD64(ssa.OpPopCount64),
sys.AMD64) sys.AMD64)
addF("math/bits", "OnesCount64", addF("math/bits", "OnesCount64",
makeOnesCountLoong64(ssa.OpPopCount64), makeOnesCountLoong64(ssa.OpPopCount64),
sys.Loong64) sys.Loong64)
addF("math/bits", "OnesCount64",
makeOnesCountRISCV64(ssa.OpPopCount64),
sys.RISCV64)
addF("math/bits", "OnesCount64", addF("math/bits", "OnesCount64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0]) return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
@ -1146,6 +1183,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
addF("math/bits", "OnesCount32", addF("math/bits", "OnesCount32",
makeOnesCountLoong64(ssa.OpPopCount32), makeOnesCountLoong64(ssa.OpPopCount32),
sys.Loong64) sys.Loong64)
addF("math/bits", "OnesCount32",
makeOnesCountRISCV64(ssa.OpPopCount32),
sys.RISCV64)
addF("math/bits", "OnesCount32", addF("math/bits", "OnesCount32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0]) return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
@ -1157,6 +1197,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
addF("math/bits", "OnesCount16", addF("math/bits", "OnesCount16",
makeOnesCountLoong64(ssa.OpPopCount16), makeOnesCountLoong64(ssa.OpPopCount16),
sys.Loong64) sys.Loong64)
addF("math/bits", "OnesCount16",
makeOnesCountRISCV64(ssa.OpPopCount16),
sys.RISCV64)
addF("math/bits", "OnesCount16", addF("math/bits", "OnesCount16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0]) return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
@ -1167,6 +1210,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0]) return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0])
}, },
sys.S390X, sys.PPC64, sys.Wasm) sys.S390X, sys.PPC64, sys.Wasm)
if cfg.goriscv64 >= 22 {
addF("math/bits", "OnesCount8",
makeOnesCountRISCV64(ssa.OpPopCount8),
sys.RISCV64)
}
alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...) alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...)
addF("math/bits", "Mul64", addF("math/bits", "Mul64",

View File

@ -1114,6 +1114,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, {"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
{"riscv64", "internal/runtime/sys", "Len64"}: struct{}{}, {"riscv64", "internal/runtime/sys", "Len64"}: struct{}{},
{"riscv64", "internal/runtime/sys", "Len8"}: struct{}{}, {"riscv64", "internal/runtime/sys", "Len8"}: struct{}{},
{"riscv64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, {"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{},
@ -1131,6 +1132,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"riscv64", "math/bits", "Len8"}: struct{}{}, {"riscv64", "math/bits", "Len8"}: struct{}{},
{"riscv64", "math/bits", "Mul"}: struct{}{}, {"riscv64", "math/bits", "Mul"}: struct{}{},
{"riscv64", "math/bits", "Mul64"}: struct{}{}, {"riscv64", "math/bits", "Mul64"}: struct{}{},
{"riscv64", "math/bits", "OnesCount"}: struct{}{},
{"riscv64", "math/bits", "OnesCount16"}: struct{}{},
{"riscv64", "math/bits", "OnesCount32"}: struct{}{},
{"riscv64", "math/bits", "OnesCount64"}: struct{}{},
{"riscv64", "math/bits", "OnesCount8"}: struct{}{},
{"riscv64", "math/bits", "ReverseBytes16"}: struct{}{}, {"riscv64", "math/bits", "ReverseBytes16"}: struct{}{},
{"riscv64", "math/bits", "ReverseBytes32"}: struct{}{}, {"riscv64", "math/bits", "ReverseBytes32"}: struct{}{},
{"riscv64", "math/bits", "ReverseBytes64"}: struct{}{}, {"riscv64", "math/bits", "ReverseBytes64"}: struct{}{},

View File

@ -160,6 +160,7 @@ func InitConfig() {
ir.Syms.Loong64HasLAMCAS = typecheck.LookupRuntimeVar("loong64HasLAMCAS") // bool ir.Syms.Loong64HasLAMCAS = typecheck.LookupRuntimeVar("loong64HasLAMCAS") // bool
ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool
ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX") // bool ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX") // bool
ir.Syms.RISCV64HasZbb = typecheck.LookupRuntimeVar("riscv64HasZbb") // bool
ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s") ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s")
ir.Syms.Typedmemmove = typecheck.LookupRuntimeFunc("typedmemmove") ir.Syms.Typedmemmove = typecheck.LookupRuntimeFunc("typedmemmove")
ir.Syms.Udiv = typecheck.LookupRuntimeVar("udiv") // asm func with special ABI ir.Syms.Udiv = typecheck.LookupRuntimeVar("udiv") // asm func with special ABI

View File

@ -294,5 +294,6 @@ var arm64HasATOMICS bool
var loong64HasLAMCAS bool var loong64HasLAMCAS bool
var loong64HasLAM_BH bool var loong64HasLAM_BH bool
var loong64HasLSX bool var loong64HasLSX bool
var riscv64HasZbb bool
func asanregisterglobals(unsafe.Pointer, uintptr) func asanregisterglobals(unsafe.Pointer, uintptr)

View File

@ -242,6 +242,7 @@ var runtimeDecls = [...]struct {
{"loong64HasLAMCAS", varTag, 6}, {"loong64HasLAMCAS", varTag, 6},
{"loong64HasLAM_BH", varTag, 6}, {"loong64HasLAM_BH", varTag, 6},
{"loong64HasLSX", varTag, 6}, {"loong64HasLSX", varTag, 6},
{"riscv64HasZbb", varTag, 6},
{"asanregisterglobals", funcTag, 130}, {"asanregisterglobals", funcTag, 130},
} }

View File

@ -221,6 +221,7 @@ var builtins = [...]struct {
{"runtime.loong64HasLAMCAS", 0}, {"runtime.loong64HasLAMCAS", 0},
{"runtime.loong64HasLAM_BH", 0}, {"runtime.loong64HasLAM_BH", 0},
{"runtime.loong64HasLSX", 0}, {"runtime.loong64HasLSX", 0},
{"runtime.riscv64HasZbb", 0},
{"runtime.asanregisterglobals", 1}, {"runtime.asanregisterglobals", 1},
{"runtime.deferproc", 1}, {"runtime.deferproc", 1},
{"runtime.deferprocStack", 1}, {"runtime.deferprocStack", 1},

View File

@ -145,6 +145,7 @@ var RISCV64 struct {
_ CacheLinePad _ CacheLinePad
HasFastMisaligned bool // Fast misaligned accesses HasFastMisaligned bool // Fast misaligned accesses
HasV bool // Vector extension compatible with RVV 1.0 HasV bool // Vector extension compatible with RVV 1.0
HasZbb bool // Basic bit-manipulation extension
_ CacheLinePad _ CacheLinePad
} }

View File

@ -12,6 +12,7 @@ func doinit() {
options = []option{ options = []option{
{Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
{Name: "v", Feature: &RISCV64.HasV}, {Name: "v", Feature: &RISCV64.HasV},
{Name: "zbb", Feature: &RISCV64.HasZbb},
} }
osInit() osInit()
} }

View File

@ -50,6 +50,7 @@ const (
// Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4
riscv_HWPROBE_IMA_V = 0x4 riscv_HWPROBE_IMA_V = 0x4
riscv_HWPROBE_EXT_ZBB = 0x10
riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 riscv_HWPROBE_KEY_CPUPERF_0 = 0x5
riscv_HWPROBE_MISALIGNED_FAST = 0x3 riscv_HWPROBE_MISALIGNED_FAST = 0x3
riscv_HWPROBE_MISALIGNED_MASK = 0x7 riscv_HWPROBE_MISALIGNED_MASK = 0x7
@ -83,6 +84,7 @@ func osInit() {
if pairs[0].key != -1 { if pairs[0].key != -1 {
v := uint(pairs[0].value) v := uint(pairs[0].value)
RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
} }
if pairs[1].key != -1 { if pairs[1].key != -1 {
v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK

View File

@ -38,4 +38,6 @@ var (
loong64HasLAMCAS bool loong64HasLAMCAS bool
loong64HasLAM_BH bool loong64HasLAM_BH bool
loong64HasLSX bool loong64HasLSX bool
riscv64HasZbb bool
) )

View File

@ -778,6 +778,9 @@ func cpuinit(env string) {
loong64HasLAMCAS = cpu.Loong64.HasLAMCAS loong64HasLAMCAS = cpu.Loong64.HasLAMCAS
loong64HasLAM_BH = cpu.Loong64.HasLAM_BH loong64HasLAM_BH = cpu.Loong64.HasLAM_BH
loong64HasLSX = cpu.Loong64.HasLSX loong64HasLSX = cpu.Loong64.HasLSX
case "riscv64":
riscv64HasZbb = cpu.RISCV64.HasZbb
} }
} }

View File

@ -181,8 +181,9 @@ func OnesCount(n uint) int {
// amd64:"POPCNTQ" // amd64:"POPCNTQ"
// arm64:"VCNT","VUADDLV" // arm64:"VCNT","VUADDLV"
// loong64:"VPCNTV" // loong64:"VPCNTV"
// s390x:"POPCNT"
// ppc64x:"POPCNTD" // ppc64x:"POPCNTD"
// riscv64:"CPOP\t"
// s390x:"POPCNT"
// wasm:"I64Popcnt" // wasm:"I64Popcnt"
return bits.OnesCount(n) return bits.OnesCount(n)
} }
@ -192,8 +193,9 @@ func OnesCount64(n uint64) int {
// amd64:"POPCNTQ" // amd64:"POPCNTQ"
// arm64:"VCNT","VUADDLV" // arm64:"VCNT","VUADDLV"
// loong64:"VPCNTV" // loong64:"VPCNTV"
// s390x:"POPCNT"
// ppc64x:"POPCNTD" // ppc64x:"POPCNTD"
// riscv64:"CPOP\t"
// s390x:"POPCNT"
// wasm:"I64Popcnt" // wasm:"I64Popcnt"
return bits.OnesCount64(n) return bits.OnesCount64(n)
} }
@ -203,8 +205,9 @@ func OnesCount32(n uint32) int {
// amd64:"POPCNTL" // amd64:"POPCNTL"
// arm64:"VCNT","VUADDLV" // arm64:"VCNT","VUADDLV"
// loong64:"VPCNTW" // loong64:"VPCNTW"
// s390x:"POPCNT"
// ppc64x:"POPCNTW" // ppc64x:"POPCNTW"
// riscv64:"CPOPW"
// s390x:"POPCNT"
// wasm:"I64Popcnt" // wasm:"I64Popcnt"
return bits.OnesCount32(n) return bits.OnesCount32(n)
} }
@ -214,15 +217,17 @@ func OnesCount16(n uint16) int {
// amd64:"POPCNTL" // amd64:"POPCNTL"
// arm64:"VCNT","VUADDLV" // arm64:"VCNT","VUADDLV"
// loong64:"VPCNTH" // loong64:"VPCNTH"
// s390x:"POPCNT"
// ppc64x:"POPCNTW" // ppc64x:"POPCNTW"
// riscv64:"CPOP\t"
// s390x:"POPCNT"
// wasm:"I64Popcnt" // wasm:"I64Popcnt"
return bits.OnesCount16(n) return bits.OnesCount16(n)
} }
func OnesCount8(n uint8) int { func OnesCount8(n uint8) int {
// s390x:"POPCNT"
// ppc64x:"POPCNTB" // ppc64x:"POPCNTB"
// riscv64/rva22u64,riscv64/rva23u64:"CPOP\t"
// s390x:"POPCNT"
// wasm:"I64Popcnt" // wasm:"I64Popcnt"
return bits.OnesCount8(n) return bits.OnesCount8(n)
} }