mirror of
https://github.com/golang/go.git
synced 2025-05-07 00:23:03 +00:00
Add more optimization with TST/CMN. 1. A tiny benchmark shows more than 12% improvement. TSTCMN-4 378µs ± 0% 332µs ± 0% -12.15% (p=0.000 n=30+27) (https://github.com/benshi001/ugo1/blob/master/tstcmn_test.go) 2. There is little regression in the go1 benchmark, excluding noise. name old time/op new time/op delta BinaryTree17-4 19.1s ± 0% 19.1s ± 0% ~ (p=0.994 n=28+29) Fannkuch11-4 10.0s ± 0% 10.0s ± 0% ~ (p=0.198 n=30+25) FmtFprintfEmpty-4 233ns ± 0% 233ns ± 0% +0.14% (p=0.002 n=24+30) FmtFprintfString-4 428ns ± 0% 428ns ± 0% ~ (all equal) FmtFprintfInt-4 472ns ± 0% 472ns ± 0% ~ (all equal) FmtFprintfIntInt-4 725ns ± 0% 725ns ± 0% ~ (all equal) FmtFprintfPrefixedInt-4 889ns ± 0% 888ns ± 0% ~ (p=0.632 n=28+30) FmtFprintfFloat-4 1.20µs ± 0% 1.20µs ± 0% +0.05% (p=0.001 n=18+30) FmtManyArgs-4 3.00µs ± 0% 2.99µs ± 0% -0.07% (p=0.001 n=27+30) GobDecode-4 42.1ms ± 0% 42.2ms ± 0% +0.29% (p=0.000 n=28+28) GobEncode-4 38.6ms ± 9% 38.8ms ± 9% ~ (p=0.912 n=30+30) Gzip-4 2.07s ± 1% 2.05s ± 1% -0.64% (p=0.000 n=29+30) Gunzip-4 175ms ± 0% 175ms ± 0% -0.15% (p=0.001 n=30+30) HTTPClientServer-4 872µs ± 5% 880µs ± 6% ~ (p=0.196 n=30+29) JSONEncode-4 88.5ms ± 1% 89.8ms ± 1% +1.49% (p=0.000 n=23+24) JSONDecode-4 393ms ± 1% 390ms ± 1% -0.89% (p=0.000 n=28+30) Mandelbrot200-4 19.5ms ± 0% 19.5ms ± 0% ~ (p=0.405 n=29+28) GoParse-4 19.9ms ± 0% 20.0ms ± 0% +0.27% (p=0.000 n=30+30) RegexpMatchEasy0_32-4 431ns ± 0% 431ns ± 0% ~ (p=1.000 n=30+30) RegexpMatchEasy0_1K-4 1.61µs ± 0% 1.61µs ± 0% ~ (p=0.527 n=26+26) RegexpMatchEasy1_32-4 443ns ± 0% 443ns ± 0% ~ (all equal) RegexpMatchEasy1_1K-4 2.58µs ± 1% 2.58µs ± 1% ~ (p=0.578 n=27+25) RegexpMatchMedium_32-4 740ns ± 0% 740ns ± 0% ~ (p=0.357 n=30+30) RegexpMatchMedium_1K-4 223µs ± 0% 223µs ± 0% +0.16% (p=0.000 n=30+29) RegexpMatchHard_32-4 12.3µs ± 0% 12.3µs ± 0% ~ (p=0.236 n=27+27) RegexpMatchHard_1K-4 371µs ± 0% 371µs ± 0% +0.09% (p=0.000 n=30+27) Revcomp-4 2.85s ± 0% 2.85s ± 0% ~ (p=0.057 n=28+25) Template-4 408ms ± 1% 409ms ± 1% ~ (p=0.117 n=29+29) TimeParse-4 1.93µs ± 0% 1.93µs ± 0% ~ (p=0.535 n=29+28) TimeFormat-4 1.99µs ± 0% 1.99µs ± 0% ~ (p=0.168 n=29+28) [Geo mean] 306µs 307µs +0.07% name old speed new speed delta GobDecode-4 18.3MB/s ± 0% 18.2MB/s ± 0% -0.31% (p=0.000 n=28+29) GobEncode-4 19.9MB/s ± 8% 19.8MB/s ± 9% ~ (p=0.923 n=30+30) Gzip-4 9.39MB/s ± 1% 9.45MB/s ± 1% +0.65% (p=0.000 n=29+30) Gunzip-4 111MB/s ± 0% 111MB/s ± 0% +0.15% (p=0.001 n=30+30) JSONEncode-4 21.9MB/s ± 1% 21.6MB/s ± 1% -1.45% (p=0.000 n=23+23) JSONDecode-4 4.94MB/s ± 1% 4.98MB/s ± 1% +0.84% (p=0.000 n=27+30) GoParse-4 2.91MB/s ± 0% 2.90MB/s ± 0% -0.34% (p=0.000 n=21+22) RegexpMatchEasy0_32-4 74.1MB/s ± 0% 74.1MB/s ± 0% ~ (p=0.469 n=29+28) RegexpMatchEasy0_1K-4 634MB/s ± 0% 634MB/s ± 0% ~ (p=0.978 n=24+28) RegexpMatchEasy1_32-4 72.2MB/s ± 0% 72.2MB/s ± 0% ~ (p=0.064 n=27+29) RegexpMatchEasy1_1K-4 396MB/s ± 1% 396MB/s ± 1% ~ (p=0.583 n=27+25) RegexpMatchMedium_32-4 1.35MB/s ± 0% 1.35MB/s ± 0% ~ (all equal) RegexpMatchMedium_1K-4 4.60MB/s ± 0% 4.59MB/s ± 0% -0.14% (p=0.000 n=30+26) RegexpMatchHard_32-4 2.61MB/s ± 0% 2.61MB/s ± 0% ~ (all equal) RegexpMatchHard_1K-4 2.76MB/s ± 0% 2.76MB/s ± 0% ~ (all equal) Revcomp-4 89.1MB/s ± 0% 89.1MB/s ± 0% ~ (p=0.059 n=28+25) Template-4 4.75MB/s ± 1% 4.75MB/s ± 1% ~ (p=0.106 n=29+29) [Geo mean] 18.3MB/s 18.3MB/s -0.07% Change-Id: I3cd76ce63e84b0c3cebabf9fa3573b76a7343899 Reviewed-on: https://go-review.googlesource.com/124935 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
198 lines
4.0 KiB
Go
198 lines
4.0 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "unsafe"
|
|
|
|
// This file contains code generation tests related to the comparison
|
|
// operators.
|
|
|
|
// -------------- //
|
|
// Equality //
|
|
// -------------- //
|
|
|
|
// Check that compare to constant string use 2/4/8 byte compares
|
|
|
|
func CompareString1(s string) bool {
|
|
// amd64:`CMPW\t\(.*\), [$]`
|
|
// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
|
|
// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xx"
|
|
}
|
|
|
|
func CompareString2(s string) bool {
|
|
// amd64:`CMPL\t\(.*\), [$]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
|
|
return s == "xxxx"
|
|
}
|
|
|
|
func CompareString3(s string) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
// arm64:-`CMPW\t`
|
|
// ppc64le:-`CMPW\t`
|
|
// s390x:-`CMPW\t`
|
|
return s == "xxxxxxxx"
|
|
}
|
|
|
|
// Check that arrays compare use 2/4/8 byte compares
|
|
|
|
func CompareArray1(a, b [2]byte) bool {
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// arm64:-`MOVBU\t`
|
|
// ppc64le:-`MOVBZ\t`
|
|
// s390x:-`MOVBZ\t`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray2(a, b [3]uint16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray3(a, b [3]int16) bool {
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray4(a, b [12]int8) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
func CompareArray5(a, b [15]byte) bool {
|
|
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
|
|
return a == b
|
|
}
|
|
|
|
// This was a TODO in mapaccess1_faststr
|
|
func CompareArray6(a, b unsafe.Pointer) bool {
|
|
// amd64:`CMPL\t\(.*\), [A-Z]`
|
|
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
|
|
return *((*[4]byte)(a)) != *((*[4]byte)(b))
|
|
}
|
|
|
|
// -------------- //
|
|
// Ordering //
|
|
// -------------- //
|
|
|
|
// Test that LEAQ/ADDQconst are folded into SETx ops
|
|
|
|
func CmpFold(x uint32) bool {
|
|
// amd64:`SETHI\t.*\(SP\)`
|
|
return x > 4
|
|
}
|
|
|
|
// Test that direct comparisons with memory are generated when
|
|
// possible
|
|
|
|
func CmpMem1(p int, q *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return p < *q
|
|
}
|
|
|
|
func CmpMem2(p *int, q int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [A-Z]`
|
|
return *p < q
|
|
}
|
|
|
|
func CmpMem3(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return *p < 7
|
|
}
|
|
|
|
func CmpMem4(p *int) bool {
|
|
// amd64:`CMPQ\t\(.*\), [$]7`
|
|
return 7 < *p
|
|
}
|
|
|
|
func CmpMem5(p **int) {
|
|
// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
|
|
*p = nil
|
|
}
|
|
|
|
func CmpMem6(a []int) int {
|
|
// 386:`CMPL\s8\([A-Z]+\),`
|
|
// amd64:`CMPQ\s16\([A-Z]+\),`
|
|
if a[1] > a[2] {
|
|
return 1
|
|
} else {
|
|
return 2
|
|
}
|
|
}
|
|
|
|
// Check tbz/tbnz are generated when comparing against zero on arm64
|
|
|
|
func CmpZero1(a int32, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero2(a int64, ptr *int) {
|
|
if a < 0 { // arm64:"TBZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero3(a int32, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpZero4(a int64, ptr *int) {
|
|
if a >= 0 { // arm64:"TBNZ"
|
|
*ptr = 0
|
|
}
|
|
}
|
|
|
|
func CmpToZero(a, b, d int32) int32 {
|
|
// arm:`TST`,-`AND`
|
|
// arm64:`TSTW`,-`AND`
|
|
c0 := a&b < 0
|
|
// arm:`CMN`,-`ADD`
|
|
// arm64:`CMNW`,-`ADD`
|
|
c1 := a+b < 0
|
|
// arm:`TEQ`,-`XOR`
|
|
c2 := a^b < 0
|
|
// arm64:`TST`,-`AND`
|
|
c3 := int64(a)&int64(b) < 0
|
|
// arm64:`CMN`,-`ADD`
|
|
c4 := int64(a)+int64(b) < 0
|
|
// not optimized to CMNW/CMN due to further use of b+d
|
|
// arm64:`ADD`,-`CMNW`
|
|
c5 := b+d == 0
|
|
// not optimized to TSTW/TST due to further use of a&d
|
|
// arm64:`AND`,-`TSTW`
|
|
c6 := a&d >= 0
|
|
if c0 {
|
|
return 1
|
|
} else if c1 {
|
|
return 2
|
|
} else if c2 {
|
|
return 3
|
|
} else if c3 {
|
|
return 4
|
|
} else if c4 {
|
|
return 5
|
|
} else if c5 {
|
|
return b + d
|
|
} else if c6 {
|
|
return a & d
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|