go/test/codegen/comparisons.go
Ben Shi 0e9f1de0b7 cmd/compile: optimize arm64's comparison
Add more optimization with TST/CMN.

1. A tiny benchmark shows more than 12% improvement.
TSTCMN-4                    378µs ± 0%     332µs ± 0%  -12.15%  (p=0.000 n=30+27)
(https://github.com/benshi001/ugo1/blob/master/tstcmn_test.go)

2. There is little regression in the go1 benchmark, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              19.1s ± 0%     19.1s ± 0%    ~     (p=0.994 n=28+29)
Fannkuch11-4                10.0s ± 0%     10.0s ± 0%    ~     (p=0.198 n=30+25)
FmtFprintfEmpty-4           233ns ± 0%     233ns ± 0%  +0.14%  (p=0.002 n=24+30)
FmtFprintfString-4          428ns ± 0%     428ns ± 0%    ~     (all equal)
FmtFprintfInt-4             472ns ± 0%     472ns ± 0%    ~     (all equal)
FmtFprintfIntInt-4          725ns ± 0%     725ns ± 0%    ~     (all equal)
FmtFprintfPrefixedInt-4     889ns ± 0%     888ns ± 0%    ~     (p=0.632 n=28+30)
FmtFprintfFloat-4          1.20µs ± 0%    1.20µs ± 0%  +0.05%  (p=0.001 n=18+30)
FmtManyArgs-4              3.00µs ± 0%    2.99µs ± 0%  -0.07%  (p=0.001 n=27+30)
GobDecode-4                42.1ms ± 0%    42.2ms ± 0%  +0.29%  (p=0.000 n=28+28)
GobEncode-4                38.6ms ± 9%    38.8ms ± 9%    ~     (p=0.912 n=30+30)
Gzip-4                      2.07s ± 1%     2.05s ± 1%  -0.64%  (p=0.000 n=29+30)
Gunzip-4                    175ms ± 0%     175ms ± 0%  -0.15%  (p=0.001 n=30+30)
HTTPClientServer-4          872µs ± 5%     880µs ± 6%    ~     (p=0.196 n=30+29)
JSONEncode-4               88.5ms ± 1%    89.8ms ± 1%  +1.49%  (p=0.000 n=23+24)
JSONDecode-4                393ms ± 1%     390ms ± 1%  -0.89%  (p=0.000 n=28+30)
Mandelbrot200-4            19.5ms ± 0%    19.5ms ± 0%    ~     (p=0.405 n=29+28)
GoParse-4                  19.9ms ± 0%    20.0ms ± 0%  +0.27%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4       431ns ± 0%     431ns ± 0%    ~     (p=1.000 n=30+30)
RegexpMatchEasy0_1K-4      1.61µs ± 0%    1.61µs ± 0%    ~     (p=0.527 n=26+26)
RegexpMatchEasy1_32-4       443ns ± 0%     443ns ± 0%    ~     (all equal)
RegexpMatchEasy1_1K-4      2.58µs ± 1%    2.58µs ± 1%    ~     (p=0.578 n=27+25)
RegexpMatchMedium_32-4      740ns ± 0%     740ns ± 0%    ~     (p=0.357 n=30+30)
RegexpMatchMedium_1K-4      223µs ± 0%     223µs ± 0%  +0.16%  (p=0.000 n=30+29)
RegexpMatchHard_32-4       12.3µs ± 0%    12.3µs ± 0%    ~     (p=0.236 n=27+27)
RegexpMatchHard_1K-4        371µs ± 0%     371µs ± 0%  +0.09%  (p=0.000 n=30+27)
Revcomp-4                   2.85s ± 0%     2.85s ± 0%    ~     (p=0.057 n=28+25)
Template-4                  408ms ± 1%     409ms ± 1%    ~     (p=0.117 n=29+29)
TimeParse-4                1.93µs ± 0%    1.93µs ± 0%    ~     (p=0.535 n=29+28)
TimeFormat-4               1.99µs ± 0%    1.99µs ± 0%    ~     (p=0.168 n=29+28)
[Geo mean]                  306µs          307µs       +0.07%

name                     old speed      new speed      delta
GobDecode-4              18.3MB/s ± 0%  18.2MB/s ± 0%  -0.31%  (p=0.000 n=28+29)
GobEncode-4              19.9MB/s ± 8%  19.8MB/s ± 9%    ~     (p=0.923 n=30+30)
Gzip-4                   9.39MB/s ± 1%  9.45MB/s ± 1%  +0.65%  (p=0.000 n=29+30)
Gunzip-4                  111MB/s ± 0%   111MB/s ± 0%  +0.15%  (p=0.001 n=30+30)
JSONEncode-4             21.9MB/s ± 1%  21.6MB/s ± 1%  -1.45%  (p=0.000 n=23+23)
JSONDecode-4             4.94MB/s ± 1%  4.98MB/s ± 1%  +0.84%  (p=0.000 n=27+30)
GoParse-4                2.91MB/s ± 0%  2.90MB/s ± 0%  -0.34%  (p=0.000 n=21+22)
RegexpMatchEasy0_32-4    74.1MB/s ± 0%  74.1MB/s ± 0%    ~     (p=0.469 n=29+28)
RegexpMatchEasy0_1K-4     634MB/s ± 0%   634MB/s ± 0%    ~     (p=0.978 n=24+28)
RegexpMatchEasy1_32-4    72.2MB/s ± 0%  72.2MB/s ± 0%    ~     (p=0.064 n=27+29)
RegexpMatchEasy1_1K-4     396MB/s ± 1%   396MB/s ± 1%    ~     (p=0.583 n=27+25)
RegexpMatchMedium_32-4   1.35MB/s ± 0%  1.35MB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K-4   4.60MB/s ± 0%  4.59MB/s ± 0%  -0.14%  (p=0.000 n=30+26)
RegexpMatchHard_32-4     2.61MB/s ± 0%  2.61MB/s ± 0%    ~     (all equal)
RegexpMatchHard_1K-4     2.76MB/s ± 0%  2.76MB/s ± 0%    ~     (all equal)
Revcomp-4                89.1MB/s ± 0%  89.1MB/s ± 0%    ~     (p=0.059 n=28+25)
Template-4               4.75MB/s ± 1%  4.75MB/s ± 1%    ~     (p=0.106 n=29+29)
[Geo mean]               18.3MB/s       18.3MB/s       -0.07%

Change-Id: I3cd76ce63e84b0c3cebabf9fa3573b76a7343899
Reviewed-on: https://go-review.googlesource.com/124935
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-09-05 02:51:28 +00:00

198 lines
4.0 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "unsafe"
// This file contains code generation tests related to the comparison
// operators.
// -------------- //
// Equality //
// -------------- //
// Check that compare to constant string use 2/4/8 byte compares
func CompareString1(s string) bool {
// amd64:`CMPW\t\(.*\), [$]`
// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
return s == "xx"
}
func CompareString2(s string) bool {
// amd64:`CMPL\t\(.*\), [$]`
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
return s == "xxxx"
}
func CompareString3(s string) bool {
// amd64:`CMPQ\t\(.*\), [A-Z]`
// arm64:-`CMPW\t`
// ppc64le:-`CMPW\t`
// s390x:-`CMPW\t`
return s == "xxxxxxxx"
}
// Check that arrays compare use 2/4/8 byte compares
func CompareArray1(a, b [2]byte) bool {
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
// arm64:-`MOVBU\t`
// ppc64le:-`MOVBZ\t`
// s390x:-`MOVBZ\t`
return a == b
}
func CompareArray2(a, b [3]uint16) bool {
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
return a == b
}
func CompareArray3(a, b [3]int16) bool {
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
return a == b
}
func CompareArray4(a, b [12]int8) bool {
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
return a == b
}
func CompareArray5(a, b [15]byte) bool {
// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
return a == b
}
// This was a TODO in mapaccess1_faststr
func CompareArray6(a, b unsafe.Pointer) bool {
// amd64:`CMPL\t\(.*\), [A-Z]`
// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
return *((*[4]byte)(a)) != *((*[4]byte)(b))
}
// -------------- //
// Ordering //
// -------------- //
// Test that LEAQ/ADDQconst are folded into SETx ops
func CmpFold(x uint32) bool {
// amd64:`SETHI\t.*\(SP\)`
return x > 4
}
// Test that direct comparisons with memory are generated when
// possible
func CmpMem1(p int, q *int) bool {
// amd64:`CMPQ\t\(.*\), [A-Z]`
return p < *q
}
func CmpMem2(p *int, q int) bool {
// amd64:`CMPQ\t\(.*\), [A-Z]`
return *p < q
}
func CmpMem3(p *int) bool {
// amd64:`CMPQ\t\(.*\), [$]7`
return *p < 7
}
func CmpMem4(p *int) bool {
// amd64:`CMPQ\t\(.*\), [$]7`
return 7 < *p
}
func CmpMem5(p **int) {
// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
*p = nil
}
func CmpMem6(a []int) int {
// 386:`CMPL\s8\([A-Z]+\),`
// amd64:`CMPQ\s16\([A-Z]+\),`
if a[1] > a[2] {
return 1
} else {
return 2
}
}
// Check tbz/tbnz are generated when comparing against zero on arm64
func CmpZero1(a int32, ptr *int) {
if a < 0 { // arm64:"TBZ"
*ptr = 0
}
}
func CmpZero2(a int64, ptr *int) {
if a < 0 { // arm64:"TBZ"
*ptr = 0
}
}
func CmpZero3(a int32, ptr *int) {
if a >= 0 { // arm64:"TBNZ"
*ptr = 0
}
}
func CmpZero4(a int64, ptr *int) {
if a >= 0 { // arm64:"TBNZ"
*ptr = 0
}
}
func CmpToZero(a, b, d int32) int32 {
// arm:`TST`,-`AND`
// arm64:`TSTW`,-`AND`
c0 := a&b < 0
// arm:`CMN`,-`ADD`
// arm64:`CMNW`,-`ADD`
c1 := a+b < 0
// arm:`TEQ`,-`XOR`
c2 := a^b < 0
// arm64:`TST`,-`AND`
c3 := int64(a)&int64(b) < 0
// arm64:`CMN`,-`ADD`
c4 := int64(a)+int64(b) < 0
// not optimized to CMNW/CMN due to further use of b+d
// arm64:`ADD`,-`CMNW`
c5 := b+d == 0
// not optimized to TSTW/TST due to further use of a&d
// arm64:`AND`,-`TSTW`
c6 := a&d >= 0
if c0 {
return 1
} else if c1 {
return 2
} else if c2 {
return 3
} else if c3 {
return 4
} else if c4 {
return 5
} else if c5 {
return b + d
} else if c6 {
return a & d
} else {
return 0
}
}