mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
This CL optimize amd64's code: "ADDQ $-1, MEM_OP" -> "DECQ MEM_OP" "ADDL $-1, MEM_OP" -> "DECL MEM_OP" 1. The total size of pkg/linux_amd64 (excluding cmd/compile) decreases about 0.1KB. 2. The go1 benchmark shows little regression, excluding noise. name old time/op new time/op delta BinaryTree17-4 2.60s ± 5% 2.64s ± 3% +1.53% (p=0.000 n=38+39) Fannkuch11-4 2.37s ± 2% 2.38s ± 2% ~ (p=0.950 n=40+40) FmtFprintfEmpty-4 40.4ns ± 5% 40.5ns ± 5% ~ (p=0.711 n=40+40) FmtFprintfString-4 72.4ns ± 5% 72.3ns ± 3% ~ (p=0.485 n=40+40) FmtFprintfInt-4 79.7ns ± 3% 80.1ns ± 3% ~ (p=0.124 n=40+40) FmtFprintfIntInt-4 126ns ± 3% 127ns ± 3% +0.71% (p=0.027 n=40+40) FmtFprintfPrefixedInt-4 153ns ± 4% 153ns ± 2% ~ (p=0.604 n=40+40) FmtFprintfFloat-4 206ns ± 5% 210ns ± 5% +1.79% (p=0.002 n=40+40) FmtManyArgs-4 498ns ± 3% 496ns ± 3% ~ (p=0.099 n=40+40) GobDecode-4 6.48ms ± 6% 6.47ms ± 7% ~ (p=0.686 n=39+40) GobEncode-4 5.95ms ± 7% 5.96ms ± 6% ~ (p=0.670 n=40+34) Gzip-4 224ms ± 6% 223ms ± 5% ~ (p=0.143 n=40+40) Gunzip-4 36.5ms ± 4% 36.5ms ± 4% ~ (p=0.556 n=40+40) HTTPClientServer-4 60.7µs ± 2% 59.9µs ± 3% -1.20% (p=0.000 n=39+39) JSONEncode-4 9.03ms ± 4% 9.04ms ± 4% ~ (p=0.589 n=40+40) JSONDecode-4 49.4ms ± 4% 49.2ms ± 4% ~ (p=0.276 n=40+40) Mandelbrot200-4 3.80ms ± 4% 3.79ms ± 4% ~ (p=0.837 n=40+40) GoParse-4 3.15ms ± 5% 3.13ms ± 5% ~ (p=0.240 n=40+40) RegexpMatchEasy0_32-4 72.9ns ± 3% 72.0ns ± 8% -1.25% (p=0.003 n=40+40) RegexpMatchEasy0_1K-4 229ns ± 5% 230ns ± 4% ~ (p=0.318 n=40+40) RegexpMatchEasy1_32-4 66.9ns ± 3% 67.3ns ± 7% ~ (p=0.817 n=40+40) RegexpMatchEasy1_1K-4 371ns ± 5% 370ns ± 4% ~ (p=0.275 n=40+40) RegexpMatchMedium_32-4 106ns ± 4% 104ns ± 7% -2.28% (p=0.000 n=40+40) RegexpMatchMedium_1K-4 32.0µs ± 2% 31.4µs ± 3% -2.08% (p=0.000 n=40+40) RegexpMatchHard_32-4 1.54µs ± 7% 1.52µs ± 3% -1.80% (p=0.007 n=39+40) RegexpMatchHard_1K-4 45.8µs ± 4% 45.5µs ± 3% ~ (p=0.707 n=40+40) Revcomp-4 401ms ± 5% 401ms ± 6% ~ (p=0.935 n=40+40) Template-4 62.4ms ± 4% 61.2ms ± 3% -1.85% (p=0.000 n=40+40) TimeParse-4 315ns ± 2% 318ns ± 3% +1.10% (p=0.002 n=40+40) TimeFormat-4 297ns ± 3% 298ns ± 3% ~ (p=0.238 n=40+40) [Geo mean] 45.8µs 45.7µs -0.22% name old speed new speed delta GobDecode-4 119MB/s ± 6% 119MB/s ± 7% ~ (p=0.684 n=39+40) GobEncode-4 129MB/s ± 7% 128MB/s ± 6% ~ (p=0.413 n=40+34) Gzip-4 86.6MB/s ± 6% 87.0MB/s ± 6% ~ (p=0.145 n=40+40) Gunzip-4 532MB/s ± 4% 532MB/s ± 4% ~ (p=0.556 n=40+40) JSONEncode-4 215MB/s ± 4% 215MB/s ± 4% ~ (p=0.583 n=40+40) JSONDecode-4 39.3MB/s ± 4% 39.5MB/s ± 4% ~ (p=0.277 n=40+40) GoParse-4 18.4MB/s ± 5% 18.5MB/s ± 5% ~ (p=0.229 n=40+40) RegexpMatchEasy0_32-4 439MB/s ± 3% 445MB/s ± 8% +1.28% (p=0.003 n=40+40) RegexpMatchEasy0_1K-4 4.46GB/s ± 4% 4.45GB/s ± 4% ~ (p=0.343 n=40+40) RegexpMatchEasy1_32-4 479MB/s ± 3% 476MB/s ± 7% ~ (p=0.855 n=40+40) RegexpMatchEasy1_1K-4 2.76GB/s ± 5% 2.77GB/s ± 4% ~ (p=0.250 n=40+40) RegexpMatchMedium_32-4 9.36MB/s ± 4% 9.58MB/s ± 6% +2.31% (p=0.001 n=40+40) RegexpMatchMedium_1K-4 32.0MB/s ± 2% 32.7MB/s ± 3% +2.12% (p=0.000 n=40+40) RegexpMatchHard_32-4 20.7MB/s ± 7% 21.1MB/s ± 3% +1.95% (p=0.005 n=40+40) RegexpMatchHard_1K-4 22.4MB/s ± 4% 22.5MB/s ± 3% ~ (p=0.689 n=40+40) Revcomp-4 634MB/s ± 5% 634MB/s ± 6% ~ (p=0.935 n=40+40) Template-4 31.1MB/s ± 3% 31.7MB/s ± 3% +1.88% (p=0.000 n=40+40) [Geo mean] 129MB/s 130MB/s +0.62% Change-Id: I9d61ee810d900920c572cbe89e2f1626bfed12b7 Reviewed-on: https://go-review.googlesource.com/c/145209 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
348 lines
7.2 KiB
Go
348 lines
7.2 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on integer types.
|
|
// For codegen tests on float types, see floats.go.
|
|
|
|
// ----------------- //
|
|
// Subtraction //
|
|
// ----------------- //
|
|
|
|
var ef int
|
|
func SubMem(arr []int, b, c, d int) int {
|
|
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
|
|
arr[2] -= b
|
|
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
|
|
arr[3] -= b
|
|
// 386:`DECL\s16\([A-Z]+\)`
|
|
arr[4]--
|
|
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
|
|
arr[5] -= 20
|
|
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
|
|
ef -= arr[b]
|
|
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[c] -= b
|
|
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[d] -= 15
|
|
// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[b]--
|
|
// amd64:`DECQ\s64\([A-Z]+\)`
|
|
arr[8]--
|
|
// 386:"SUBL\t4"
|
|
// amd64:"SUBQ\t8"
|
|
return arr[0] - arr[1]
|
|
}
|
|
|
|
// -------------------- //
|
|
// Multiplication //
|
|
// -------------------- //
|
|
|
|
func Pow2Muls(n1, n2 int) (int, int) {
|
|
// amd64:"SHLQ\t[$]5",-"IMULQ"
|
|
// 386:"SHLL\t[$]5",-"IMULL"
|
|
// arm:"SLL\t[$]5",-"MUL"
|
|
// arm64:"LSL\t[$]5",-"MUL"
|
|
// ppc64:"SLD\t[$]5",-"MUL"
|
|
// ppc64le:"SLD\t[$]5",-"MUL"
|
|
a := n1 * 32
|
|
|
|
// amd64:"SHLQ\t[$]6",-"IMULQ"
|
|
// 386:"SHLL\t[$]6",-"IMULL"
|
|
// arm:"SLL\t[$]6",-"MUL"
|
|
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
|
|
// ppc64:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
|
|
// ppc64le:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
|
|
b := -64 * n2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_96(n int) int {
|
|
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`
|
|
return n * 96
|
|
}
|
|
|
|
func MulMemSrc(a []uint32, b []float32) {
|
|
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
|
|
a[0] *= a[1]
|
|
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
b[0] *= b[1]
|
|
}
|
|
|
|
// Multiplications merging tests
|
|
|
|
func MergeMuls1(n int) int {
|
|
// amd64:"IMUL3Q\t[$]46"
|
|
// 386:"IMUL3L\t[$]46"
|
|
return 15*n + 31*n // 46n
|
|
}
|
|
|
|
func MergeMuls2(n int) int {
|
|
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
|
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
|
}
|
|
|
|
func MergeMuls3(a, n int) int {
|
|
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
|
|
return a*n + 19*n // (a+19)n
|
|
}
|
|
|
|
func MergeMuls4(n int) int {
|
|
// amd64:"IMUL3Q\t[$]14"
|
|
// 386:"IMUL3L\t[$]14"
|
|
return 23*n - 9*n // 14n
|
|
}
|
|
|
|
func MergeMuls5(a, n int) int {
|
|
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
|
|
return a*n - 19*n // (a-19)n
|
|
}
|
|
|
|
// -------------- //
|
|
// Division //
|
|
// -------------- //
|
|
|
|
func DivMemSrc(a []float64) {
|
|
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
a[0] /= a[1]
|
|
}
|
|
|
|
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
|
// 386:"SHRL\t[$]5",-"DIVL"
|
|
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
|
// arm:"SRL\t[$]5",-".*udiv"
|
|
// arm64:"LSR\t[$]5",-"UDIV"
|
|
// ppc64:"SRD"
|
|
// ppc64le:"SRD"
|
|
a := n1 / 32 // unsigned
|
|
|
|
// amd64:"SARQ\t[$]6",-"IDIVQ"
|
|
// 386:"SARL\t[$]6",-"IDIVL"
|
|
// arm:"SRA\t[$]6",-".*udiv"
|
|
// arm64:"ASR\t[$]6",-"SDIV"
|
|
// ppc64:"SRAD"
|
|
// ppc64le:"SRAD"
|
|
b := n2 / 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant divisions get turned into MULs
|
|
func ConstDivs(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 / 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 / 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
func FloatDivs(a []float32) float32 {
|
|
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
return a[1] / a[2]
|
|
}
|
|
|
|
func Pow2Mods(n1 uint, n2 int) (uint, int) {
|
|
// 386:"ANDL\t[$]31",-"DIVL"
|
|
// amd64:"ANDQ\t[$]31",-"DIVQ"
|
|
// arm:"AND\t[$]31",-".*udiv"
|
|
// arm64:"AND\t[$]31",-"UDIV"
|
|
// ppc64:"ANDCC\t[$]31"
|
|
// ppc64le:"ANDCC\t[$]31"
|
|
a := n1 % 32 // unsigned
|
|
|
|
// 386:-"IDIVL"
|
|
// amd64:-"IDIVQ"
|
|
// arm:-".*udiv"
|
|
// arm64:-"REM"
|
|
b := n2 % 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant modulo divs get turned into MULs
|
|
func ConstMods(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
a := n1 % 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
b := n2 % 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that fix-up code is not generated for divisions where it has been proven that
|
|
// that the divisor is not -1 or that the dividend is > MinIntNN.
|
|
func NoFix64A(divr int64) (int64, int64) {
|
|
var d int64 = 42
|
|
var e int64 = 84
|
|
if divr > 5 {
|
|
d /= divr // amd64:-"JMP"
|
|
e %= divr // amd64:-"JMP"
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix64B(divd int64) (int64, int64) {
|
|
var d int64
|
|
var e int64
|
|
var divr int64 = -1
|
|
if divd > -9223372036854775808 {
|
|
d = divd / divr // amd64:-"JMP"
|
|
e = divd % divr // amd64:-"JMP"
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32A(divr int32) (int32, int32) {
|
|
var d int32 = 42
|
|
var e int32 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32B(divd int32) (int32, int32) {
|
|
var d int32
|
|
var e int32
|
|
var divr int32 = -1
|
|
if divd > -2147483648 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16A(divr int16) (int16, int16) {
|
|
var d int16 = 42
|
|
var e int16 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16B(divd int16) (int16, int16) {
|
|
var d int16
|
|
var e int16
|
|
var divr int16 = -1
|
|
if divd > -32768 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
// Check that len() and cap() calls divided by powers of two are
|
|
// optimized into shifts and ands
|
|
|
|
func LenDiv1(a []int) int {
|
|
// 386:"SHRL\t[$]10"
|
|
// amd64:"SHRQ\t[$]10"
|
|
// ppc64:"SRD"\t[$]10"
|
|
// ppc64le:"SRD"\t[$]10"
|
|
return len(a) / 1024
|
|
}
|
|
|
|
func LenDiv2(s string) int {
|
|
// 386:"SHRL\t[$]11"
|
|
// amd64:"SHRQ\t[$]11"
|
|
// ppc64:"SRD\t[$]11"
|
|
// ppc64le:"SRD\t[$]11"
|
|
return len(s) / (4097 >> 1)
|
|
}
|
|
|
|
func LenMod1(a []int) int {
|
|
// 386:"ANDL\t[$]1023"
|
|
// amd64:"ANDQ\t[$]1023"
|
|
// ppc64:"ANDCC\t[$]1023"
|
|
// ppc64le:"ANDCC\t[$]1023"
|
|
return len(a) % 1024
|
|
}
|
|
|
|
func LenMod2(s string) int {
|
|
// 386:"ANDL\t[$]2047"
|
|
// amd64:"ANDQ\t[$]2047"
|
|
// ppc64:"ANDCC\t[$]2047"
|
|
// ppc64le:"ANDCC\t[$]2047"
|
|
return len(s) % (4097 >> 1)
|
|
}
|
|
|
|
func CapDiv(a []int) int {
|
|
// 386:"SHRL\t[$]12"
|
|
// amd64:"SHRQ\t[$]12"
|
|
// ppc64:"SRD\t[$]12"
|
|
// ppc64le:"SRD\t[$]12"
|
|
return cap(a) / ((1 << 11) + 2048)
|
|
}
|
|
|
|
func CapMod(a []int) int {
|
|
// 386:"ANDL\t[$]4095"
|
|
// amd64:"ANDQ\t[$]4095"
|
|
// ppc64:"ANDCC\t[$]4095"
|
|
// ppc64le:"ANDCC\t[$]4095"
|
|
return cap(a) % ((1 << 11) + 2048)
|
|
}
|
|
|
|
func AddMul(x int) int {
|
|
// amd64:"LEAQ\t1"
|
|
return 2*x + 1
|
|
}
|
|
|
|
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r0 := a*b + c
|
|
// arm:`MULA`-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r1 := c*79 + a
|
|
// arm:`ADD`,-`MULA`-`MUL\s`
|
|
// arm64:`ADD`,-`MADD`,-`MULW`
|
|
r2 := b*64 + c
|
|
return r0, r1, r2
|
|
}
|
|
|
|
func MULS(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm/7:`MULS`,-`MUL\s`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r0 := c - a*b
|
|
// arm/7:`MULS`-`MUL\s`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r1 := a - c*79
|
|
// arm/7:`SUB`,-`MULS`-`MUL\s`
|
|
// arm64:`SUB`,-`MSUBW`,-`MULW`
|
|
r2 := c - b*64
|
|
return r0, r1, r2
|
|
}
|