cmd/compile: prefer an add when shifting left by 1

ADD(Q|L) has generally twice the throughput.

Came up in CL 626998.

Throughput by arch:

Zen 4:

SHLL (R64, 1):   0.5
ADD  (R64, R64): 0.25

Intel Alder Lake:

SHLL (R64, 1):   0.5
ADD  (R64, R64): 0.2

Intel Haswell:

SHLL (R64, 1):   0.5
ADD  (R64, R64): 0.25

Also include a minor opt for:

(x + x) << c -> x << (c + 1)

Before this, the code:

func addShift(x int64) int64 {
    return (x + x) << 1
}

emitted two instructions:

        ADDQ    AX, AX
        SHLQ    $1, AX

but we can do it in a single shift:

        SHLQ    $2, AX

Add a codegen test for clearing the last bit.

compilecmp linux/amd64:

math
math.sqrt 243 -> 242  (-0.41%)

math [cmd/compile]
math.sqrt 243 -> 242  (-0.41%)

runtime
runtime.selectgo 5455 -> 5445  (-0.18%)
runtime.sysargs 665 -> 662  (-0.45%)
runtime.isPinned 145 -> 141  (-2.76%)
runtime.atoi64 198 -> 194  (-2.02%)
runtime.setPinned 714 -> 709  (-0.70%)

runtime [cmd/compile]
runtime.sysargs 665 -> 662  (-0.45%)
runtime.setPinned 714 -> 709  (-0.70%)
runtime.atoi64 198 -> 194  (-2.02%)
runtime.isPinned 145 -> 141  (-2.76%)

strconv
strconv.computeBounds 109 -> 107  (-1.83%)
strconv.FormatInt 201 -> 197  (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266  (-2.47%)
strconv.small 144 -> 134  (-6.94%)
strconv.AppendInt 357 -> 344  (-3.64%)
strconv.ryuDigits32 490 -> 488  (-0.41%)
strconv.AppendUint 342 -> 340  (-0.58%)

strconv [cmd/compile]
strconv.FormatInt 201 -> 197  (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266  (-2.47%)
strconv.ryuDigits32 490 -> 488  (-0.41%)
strconv.AppendUint 342 -> 340  (-0.58%)
strconv.computeBounds 109 -> 107  (-1.83%)
strconv.small 144 -> 134  (-6.94%)
strconv.AppendInt 357 -> 344  (-3.64%)

image
image.Rectangle.Inset 101 -> 97  (-3.96%)

regexp/syntax
regexp/syntax.inCharClass.func1 111 -> 110  (-0.90%)
regexp/syntax.(*compiler).quest 586 -> 573  (-2.22%)
regexp/syntax.ranges.Less 153 -> 150  (-1.96%)
regexp/syntax.(*compiler).loop 583 -> 568  (-2.57%)

time
time.Time.Before 179 -> 161  (-10.06%)
time.Time.Compare 189 -> 166  (-12.17%)
time.Time.Sub 444 -> 425  (-4.28%)
time.Time.UnixMicro 106 -> 95  (-10.38%)
time.div 592 -> 587  (-0.84%)
time.Time.UnixNano 85 -> 78  (-8.24%)
time.(*Time).UnixMilli 141 -> 140  (-0.71%)
time.Time.UnixMilli 106 -> 95  (-10.38%)
time.(*Time).UnixMicro 141 -> 140  (-0.71%)
time.Time.After 179 -> 161  (-10.06%)
time.Time.Equal 170 -> 150  (-11.76%)
time.Time.AppendBinary 766 -> 757  (-1.17%)
time.Time.IsZero 74 -> 66  (-10.81%)
time.(*Time).UnixNano 124 -> 113  (-8.87%)
time.(*Time).IsZero 113 -> 108  (-4.42%)

regexp
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569  (-3.56%)
regexp.QuoteMeta 485 -> 469  (-3.30%)

regexp/syntax [cmd/compile]
regexp/syntax.inCharClass.func1 111 -> 110  (-0.90%)
regexp/syntax.(*compiler).loop 583 -> 568  (-2.57%)
regexp/syntax.(*compiler).quest 586 -> 573  (-2.22%)
regexp/syntax.ranges.Less 153 -> 150  (-1.96%)

encoding/base64
encoding/base64.decodedLen 92 -> 90  (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97  (-2.02%)

time [cmd/compile]
time.(*Time).IsZero 113 -> 108  (-4.42%)
time.Time.IsZero 74 -> 66  (-10.81%)
time.(*Time).UnixNano 124 -> 113  (-8.87%)
time.Time.UnixMilli 106 -> 95  (-10.38%)
time.Time.Equal 170 -> 150  (-11.76%)
time.Time.UnixMicro 106 -> 95  (-10.38%)
time.(*Time).UnixMicro 141 -> 140  (-0.71%)
time.Time.Before 179 -> 161  (-10.06%)
time.Time.UnixNano 85 -> 78  (-8.24%)
time.Time.AppendBinary 766 -> 757  (-1.17%)
time.div 592 -> 587  (-0.84%)
time.Time.After 179 -> 161  (-10.06%)
time.Time.Compare 189 -> 166  (-12.17%)
time.(*Time).UnixMilli 141 -> 140  (-0.71%)
time.Time.Sub 444 -> 425  (-4.28%)

index/suffixarray
index/suffixarray.sais_8_32 1677 -> 1645  (-1.91%)
index/suffixarray.sais_32 1677 -> 1645  (-1.91%)
index/suffixarray.sais_64 1677 -> 1654  (-1.37%)
index/suffixarray.sais_8_64 1677 -> 1654  (-1.37%)
index/suffixarray.writeInt 249 -> 247  (-0.80%)

os
os.Expand 1070 -> 1051  (-1.78%)
os.Chtimes 787 -> 774  (-1.65%)

regexp [cmd/compile]
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569  (-3.56%)
regexp.QuoteMeta 485 -> 469  (-3.30%)

encoding/base64 [cmd/compile]
encoding/base64.decodedLen 92 -> 90  (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97  (-2.02%)

encoding/hex
encoding/hex.Encode 138 -> 136  (-1.45%)
encoding/hex.(*decoder).Read 830 -> 824  (-0.72%)

crypto/des
crypto/des.initFeistelBox 235 -> 229  (-2.55%)
crypto/des.cryptBlock 549 -> 538  (-2.00%)

os [cmd/compile]
os.Chtimes 787 -> 774  (-1.65%)
os.Expand 1070 -> 1051  (-1.78%)

math/big
math/big.newFloat 238 -> 223  (-6.30%)
math/big.nat.mul 2138 -> 2122  (-0.75%)
math/big.karatsubaSqr 1372 -> 1369  (-0.22%)
math/big.(*Float).sqrtInverse 895 -> 878  (-1.90%)
math/big.basicSqr 1032 -> 1017  (-1.45%)

cmd/vendor/golang.org/x/sys/unix
cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66  (-8.33%)

encoding/json
encoding/json.Indent 404 -> 403  (-0.25%)
encoding/json.MarshalIndent 303 -> 297  (-1.98%)

testing
testing.(*T).Deadline 84 -> 82  (-2.38%)
testing.(*M).Run 3545 -> 3525  (-0.56%)

archive/zip
archive/zip.headerFileInfo.ModTime 229 -> 223  (-2.62%)

encoding/gob
encoding/gob.(*encoderState).encodeInt 474 -> 469  (-1.05%)

crypto/elliptic
crypto/elliptic.Marshal 728 -> 714  (-1.92%)

debug/buildinfo
debug/buildinfo.readString 325 -> 315  (-3.08%)

image/png
image/png.(*decoder).readImagePass 10866 -> 10834  (-0.29%)

archive/tar
archive/tar.Header.allowedFormats.func3 1768 -> 1736  (-1.81%)
archive/tar.formatPAXTime 389 -> 358  (-7.97%)
archive/tar.(*Writer).writeGNUHeader 741 -> 727  (-1.89%)
archive/tar.readGNUSparseMap0x1 709 -> 695  (-1.97%)
archive/tar.(*Writer).templateV7Plus 915 -> 909  (-0.66%)

crypto/internal/cryptotest
crypto/internal/cryptotest.TestHash.func4 890 -> 879  (-1.24%)
crypto/internal/cryptotest.TestStream.func6.1 646 -> 645  (-0.15%)
crypto/internal/cryptotest.testCipher.func3 1300 -> 1289  (-0.85%)

internal/pkgbits
internal/pkgbits.(*Encoder).Int64 113 -> 103  (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72  (-2.70%)

testing/quick
testing/quick.(*Config).getRand 316 -> 315  (-0.32%)

log/slog
log/slog.TimeValue 489 -> 479  (-2.04%)

runtime/pprof
runtime/pprof.(*profileBuilder).build 2341 -> 2322  (-0.81%)

internal/coverage/cfile
internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822  (-0.24%)
internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892  (-1.33%)

cmd/internal/objabi
cmd/internal/objabi.expandArgs 1177 -> 1169  (-0.68%)

crypto/ecdsa
crypto/ecdsa.pointFromAffine 1162 -> 1144  (-1.55%)

net
net.minNonzeroTime 313 -> 308  (-1.60%)
net.cgoLookupAddrPTR 812 -> 797  (-1.85%)
net.(*IPNet).String 851 -> 827  (-2.82%)
net.IP.AppendText 488 -> 471  (-3.48%)
net.IPMask.String 281 -> 270  (-3.91%)
net.partialDeadline 374 -> 366  (-2.14%)
net.hexString 249 -> 240  (-3.61%)
net.IP.String 454 -> 453  (-0.22%)

internal/fuzz
internal/fuzz.newPcgRand 240 -> 234  (-2.50%)

crypto/x509
crypto/x509.(*Certificate).isValid 2642 -> 2611  (-1.17%)

cmd/internal/obj/s390x
cmd/internal/obj/s390x.buildop 33676 -> 33644  (-0.10%)

encoding/hex [cmd/compile]
encoding/hex.(*decoder).Read 830 -> 824  (-0.72%)
encoding/hex.Encode 138 -> 136  (-1.45%)

cmd/internal/objabi [cmd/compile]
cmd/internal/objabi.expandArgs 1177 -> 1169  (-0.68%)

math/big [cmd/compile]
math/big.(*Float).sqrtInverse 895 -> 878  (-1.90%)
math/big.nat.mul 2138 -> 2122  (-0.75%)
math/big.karatsubaSqr 1372 -> 1369  (-0.22%)
math/big.basicSqr 1032 -> 1017  (-1.45%)
math/big.newFloat 238 -> 223  (-6.30%)

encoding/json [cmd/compile]
encoding/json.MarshalIndent 303 -> 297  (-1.98%)
encoding/json.Indent 404 -> 403  (-0.25%)

cmd/covdata
main.(*metaMerge).emitCounters 985 -> 973  (-1.22%)

runtime/pprof [cmd/compile]
runtime/pprof.(*profileBuilder).build 2341 -> 2322  (-0.81%)

cmd/compile/internal/syntax
cmd/compile/internal/syntax.(*source).fill 722 -> 703  (-2.63%)

cmd/dist
main.runInstall 19081 -> 19049  (-0.17%)

crypto/tls
crypto/tls.extractPadding 176 -> 175  (-0.57%)
slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247  (-2.37%)
slices.Clone[[]uint16,uint16] 253 -> 247  (-2.37%)
slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247  (-2.37%)
crypto/tls.(*Config).cipherSuites 335 -> 326  (-2.69%)
slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434  (-0.69%)
crypto/tls.dial 1349 -> 1339  (-0.74%)
slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434  (-0.69%)

internal/pkgbits [cmd/compile]
internal/pkgbits.(*Encoder).Int64 113 -> 103  (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72  (-2.70%)

cmd/compile/internal/syntax [cmd/compile]
cmd/compile/internal/syntax.(*source).fill 722 -> 703  (-2.63%)

cmd/internal/obj/s390x [cmd/compile]
cmd/internal/obj/s390x.buildop 33676 -> 33644  (-0.10%)

cmd/go/internal/trace
cmd/go/internal/trace.Flow 910 -> 886  (-2.64%)
cmd/go/internal/trace.(*Span).Done 311 -> 304  (-2.25%)
cmd/go/internal/trace.StartSpan 620 -> 615  (-0.81%)

cmd/internal/script
cmd/internal/script.(*Engine).Execute.func2 534 -> 528  (-1.12%)

cmd/link/internal/loader
cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338  (-1.74%)

net/http
net/http.(*Transport).queueForIdleConn 1797 -> 1766  (-1.73%)
net/http.(*Transport).getConn 2149 -> 2131  (-0.84%)
net/http.(*http2ClientConn).tooIdleLocked 207 -> 197  (-4.83%)
net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508  (-2.31%)
net/http.(*Cookie).Valid 837 -> 818  (-2.27%)
net/http.(*http2responseWriter).SetReadDeadline 373 -> 357  (-4.29%)
net/http.checkIfRange 701 -> 690  (-1.57%)
net/http.(*http2SettingsFrame).Value 325 -> 298  (-8.31%)
net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767  (-1.29%)
net/http.(*Server).Serve 1746 -> 1739  (-0.40%)
net/http.http2traceGotConn 569 -> 556  (-2.28%)

net/http/pprof
net/http/pprof.collectProfile 242 -> 239  (-1.24%)

cmd/compile/internal/coverage
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438  (-0.23%)

cmd/vendor/golang.org/x/telemetry/internal/upload
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540  (-0.66%)
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572  (-0.89%)

cmd/compile/internal/coverage [cmd/compile]
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438  (-0.23%)

cmd/vendor/golang.org/x/text/language
cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284  (-1.05%)

cmd/go/internal/vcweb
cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041  (-0.38%)

cmd/go/internal/vcs
cmd/go/internal/vcs.expand 761 -> 741  (-2.63%)

cmd/compile/internal/inline/inlheur
slices.stableCmpFunc[go.shape.struct 2300 -> 2284  (-0.70%)

cmd/compile/internal/inline/inlheur [cmd/compile]
slices.stableCmpFunc[go.shape.struct 2300 -> 2284  (-0.70%)

cmd/go/internal/modfetch/codehost
cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213  (-0.18%)

cmd/link/internal/ld
cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152  (-3.18%)
cmd/link/internal/ld.(*Link).address 12559 -> 12495  (-0.51%)
cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205  (-0.76%)
cmd/link/internal/ld.elfshreloc 618 -> 616  (-0.32%)
cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779  (-1.89%)
cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663  (-0.75%)
cmd/link/internal/ld.relocSectFn 285 -> 284  (-0.35%)
cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144  (-1.37%)
cmd/link/internal/ld.decodetypeArrayLen 157 -> 152  (-3.18%)

cmd/link/internal/arm64
cmd/link/internal/arm64.gensymlate.func1 895 -> 888  (-0.78%)

cmd/go/internal/modload
cmd/go/internal/modload.queryProxy.func3 1029 -> 1012  (-1.65%)

cmd/go/internal/load
cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447  (-0.07%)

cmd/go/internal/clean
cmd/go/internal/clean.runClean 2120 -> 2104  (-0.75%)

cmd/compile/internal/ssa
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978  (-1.59%)
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719  (-1.51%)
cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294  (-0.96%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941  (-4.17%)
cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724  (-0.33%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941  (-4.17%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054  (-2.32%)

cmd/compile/internal/ssa [cmd/compile]
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719  (-1.51%)
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978  (-1.59%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054  (-2.32%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941  (-4.17%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941  (-4.17%)

file                                                before   after    Δ       %
math/bits.s                                         2352     2354     +2      +0.085%
math/bits [cmd/compile].s                           2352     2354     +2      +0.085%
math.s                                              35675    35674    -1      -0.003%
math [cmd/compile].s                                35675    35674    -1      -0.003%
runtime.s                                           577251   577245   -6      -0.001%
runtime [cmd/compile].s                             642419   642438   +19     +0.003%
sort.s                                              37434    37435    +1      +0.003%
strconv.s                                           48391    48343    -48     -0.099%
sort [cmd/compile].s                                37434    37435    +1      +0.003%
bufio.s                                             21386    21418    +32     +0.150%
strconv [cmd/compile].s                             48391    48343    -48     -0.099%
image.s                                             34978    35022    +44     +0.126%
regexp/syntax.s                                     81719    81781    +62     +0.076%
time.s                                              94341    94184    -157    -0.166%
regexp.s                                            60411    60399    -12     -0.020%
bufio [cmd/compile].s                               21512    21544    +32     +0.149%
encoding/binary.s                                   34062    34087    +25     +0.073%
regexp/syntax [cmd/compile].s                       81719    81781    +62     +0.076%
encoding/base64.s                                   11907    11903    -4      -0.034%
time [cmd/compile].s                                94341    94184    -157    -0.166%
index/suffixarray.s                                 41633    41527    -106    -0.255%
os.s                                                101770   101738   -32     -0.031%
regexp [cmd/compile].s                              60411    60399    -12     -0.020%
encoding/binary [cmd/compile].s                     37173    37198    +25     +0.067%
encoding/base64 [cmd/compile].s                     11907    11903    -4      -0.034%
os/exec.s                                           23900    23907    +7      +0.029%
encoding/hex.s                                      6038     6030     -8      -0.132%
crypto/des.s                                        5073     5056     -17     -0.335%
os [cmd/compile].s                                  102030   101998   -32     -0.031%
vendor/golang.org/x/net/http2/hpack.s               22027    22033    +6      +0.027%
math/big.s                                          164808   164753   -55     -0.033%
cmd/vendor/golang.org/x/sys/unix.s                  121450   121444   -6      -0.005%
encoding/json.s                                     110294   110287   -7      -0.006%
testing.s                                           115303   115281   -22     -0.019%
archive/zip.s                                       65329    65325    -4      -0.006%
os/user.s                                           10078    10080    +2      +0.020%
encoding/gob.s                                      143788   143783   -5      -0.003%
crypto/elliptic.s                                   30686    30704    +18     +0.059%
go/doc/comment.s                                    49401    49433    +32     +0.065%
debug/buildinfo.s                                   9095     9085     -10     -0.110%
image/png.s                                         36113    36081    -32     -0.089%
archive/tar.s                                       71994    71897    -97     -0.135%
crypto/internal/cryptotest.s                        60872    60849    -23     -0.038%
internal/pkgbits.s                                  20441    20429    -12     -0.059%
testing/quick.s                                     8236     8235     -1      -0.012%
log/slog.s                                          77568    77558    -10     -0.013%
internal/trace/internal/oldtrace.s                  52885    52896    +11     +0.021%
runtime/pprof.s                                     123978   123969   -9      -0.007%
internal/coverage/cfile.s                           25198    25184    -14     -0.056%
cmd/internal/objabi.s                               19954    19946    -8      -0.040%
crypto/ecdsa.s                                      29159    29141    -18     -0.062%
log/slog/internal/benchmarks.s                      6694     6695     +1      +0.015%
net.s                                               299569   299503   -66     -0.022%
os/exec [cmd/compile].s                             23888    23895    +7      +0.029%
internal/trace.s                                    179226   179240   +14     +0.008%
internal/fuzz.s                                     86190    86191    +1      +0.001%
crypto/x509.s                                       177195   177164   -31     -0.017%
cmd/internal/obj/s390x.s                            121642   121610   -32     -0.026%
cmd/internal/obj/ppc64.s                            140118   140122   +4      +0.003%
encoding/hex [cmd/compile].s                        6149     6141     -8      -0.130%
cmd/internal/objabi [cmd/compile].s                 19954    19946    -8      -0.040%
cmd/internal/obj/arm64.s                            158523   158555   +32     +0.020%
go/doc/comment [cmd/compile].s                      49512    49544    +32     +0.065%
math/big [cmd/compile].s                            166394   166339   -55     -0.033%
encoding/json [cmd/compile].s                       110712   110705   -7      -0.006%
cmd/covdata.s                                       39699    39687    -12     -0.030%
runtime/pprof [cmd/compile].s                       125209   125200   -9      -0.007%
cmd/compile/internal/syntax.s                       181755   181736   -19     -0.010%
cmd/dist.s                                          177893   177861   -32     -0.018%
crypto/tls.s                                        389157   389113   -44     -0.011%
internal/pkgbits [cmd/compile].s                    41644    41632    -12     -0.029%
cmd/compile/internal/syntax [cmd/compile].s         196105   196086   -19     -0.010%
cmd/compile/internal/types.s                        71315    71345    +30     +0.042%
cmd/internal/obj/s390x [cmd/compile].s              121733   121701   -32     -0.026%
cmd/go/internal/trace.s                             4796     4760     -36     -0.751%
cmd/internal/obj/arm64 [cmd/compile].s              168120   168147   +27     +0.016%
cmd/internal/obj/ppc64 [cmd/compile].s              140219   140223   +4      +0.003%
cmd/internal/script.s                               83442    83436    -6      -0.007%
cmd/link/internal/loader.s                          93299    93294    -5      -0.005%
net/http.s                                          620639   620472   -167    -0.027%
net/http/pprof.s                                    35016    35013    -3      -0.009%
cmd/compile/internal/coverage.s                     6668     6667     -1      -0.015%
cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210    34148    -62     -0.181%
cmd/compile/internal/coverage [cmd/compile].s       6664     6663     -1      -0.015%
cmd/vendor/golang.org/x/text/language.s             48077    48074    -3      -0.006%
cmd/go/internal/vcweb.s                             45193    45189    -4      -0.009%
cmd/go/internal/vcs.s                               44749    44729    -20     -0.045%
cmd/compile/internal/inline/inlheur.s               83758    83742    -16     -0.019%
cmd/compile/internal/inline/inlheur [cmd/compile].s 84773    84757    -16     -0.019%
cmd/go/internal/modfetch/codehost.s                 89098    89094    -4      -0.004%
cmd/trace.s                                         257550   257564   +14     +0.005%
cmd/link/internal/ld.s                              641945   641706   -239    -0.037%
cmd/link/internal/arm64.s                           34805    34798    -7      -0.020%
cmd/go/internal/modload.s                           328971   328954   -17     -0.005%
cmd/go/internal/load.s                              178877   178871   -6      -0.003%
cmd/go/internal/clean.s                             11006    10990    -16     -0.145%
cmd/compile/internal/ssa.s                          3552843  3553347  +504    +0.014%
cmd/compile/internal/ssa [cmd/compile].s            3752511  3753123  +612    +0.016%
total                                               36179015 36178687 -328    -0.001%

Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5
Reviewed-on: https://go-review.googlesource.com/c/go/+/641757
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Jakub Ciolek 2025-01-11 19:26:57 +01:00 committed by Gopher Robot
parent a8e532b0f2
commit cd595be6d6
5 changed files with 302 additions and 92 deletions

View File

@ -664,9 +664,11 @@
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
// and further combining shifts.
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
(BT(Q|L)const [c] (ADDQ x x)) && c>1 => (BT(Q|L)const [c-1] x)
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
(BTLconst [c] (ADDL x x)) && c>1 => (BTLconst [c-1] x)
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
@ -702,11 +704,11 @@
// We thus special-case them, by detecting the shift patterns.
// Special case resetting first/last bit
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
(ADD(L|Q) (SHR(L|Q)const [1] x) (SHR(L|Q)const [1] x))
=> (AND(L|Q)const [-2] x)
(SHRLconst [1] (SHLLconst [1] x))
(SHRLconst [1] (ADDL x x))
=> (ANDLconst [0x7fffffff] x)
(SHRQconst [1] (SHLQconst [1] x))
(SHRQconst [1] (ADDQ x x))
=> (BTRQconst [63] x)
// Special case testing first/last bit (with double-shift generated by generic.rules)
@ -933,17 +935,19 @@
(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x))
(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x))
// Prefer addition when shifting left by one
(SHL(Q|L)const [1] x) => (ADD(Q|L) x x)
// combine add/shift into LEAQ/LEAL
(ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y)
(ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y)
(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y)
(ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y)
(ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x)
// combine ADDQ/ADDQconst into LEAQ1/LEAL1
(ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y)
(ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y)
(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x)
(ADD(Q|L)const [c] (ADD(Q|L) x x)) => (LEA(Q|L)1 [c] x x)
// fold ADDQ/ADDL into LEAQ/LEAL
(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
@ -965,12 +969,18 @@
(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y)
// fold shifts into LEAQx/LEALx
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y)
(LEA(Q|L)1 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)2 [c] {s} x y)
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y)
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y)
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y)
(LEA(Q|L)2 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)4 [c] {s} x y)
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y)
(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y)
(LEA(Q|L)4 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)8 [c] {s} x y)
// (x + x) << 1 -> x << 2
(LEA(Q|L)2 [0] {s} (ADD(Q|L) x x) x) && s == nil => (SHL(Q|L)const [2] x)
// (x + x) << 2 -> x << 3 and similar
(SHL(Q|L)const [c] (ADD(Q|L) x x)) => (SHL(Q|L)const [c+1] x)
// reverse ordering of compare instruction
(SETL (InvertFlags x)) => (SETG x)

View File

@ -1261,6 +1261,21 @@ func rewriteValueAMD64_OpAMD64ADCQconst(v *Value) bool {
func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (ADDL (SHRLconst [1] x) (SHRLconst [1] x))
// result: (ANDLconst [-2] x)
for {
if v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
break
}
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
// match: (ADDL x (MOVLconst [c]))
// result: (ADDLconst [c] x)
for {
@ -1307,21 +1322,6 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
}
break
}
// match: (ADDL x (SHLLconst [1] y))
// result: (LEAL2 x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
continue
}
y := v_1.Args[0]
v.reset(OpAMD64LEAL2)
v.AddArg2(x, y)
return true
}
break
}
// match: (ADDL x (ADDL y y))
// result: (LEAL2 x y)
for {
@ -1461,14 +1461,17 @@ func rewriteValueAMD64_OpAMD64ADDLconst(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (ADDLconst [c] (SHLLconst [1] x))
// match: (ADDLconst [c] (ADDL x x))
// result: (LEAL1 [c] x x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
if v_0.Op != OpAMD64ADDL {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
x := v_0.Args[0]
v.reset(OpAMD64LEAL1)
v.AuxInt = int32ToAuxInt(c)
v.AddArg2(x, x)
@ -1806,6 +1809,21 @@ func rewriteValueAMD64_OpAMD64ADDLmodify(v *Value) bool {
func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (ADDQ (SHRQconst [1] x) (SHRQconst [1] x))
// result: (ANDQconst [-2] x)
for {
if v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
break
}
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
// match: (ADDQ x (MOVQconst <t> [c]))
// cond: is32Bit(c) && !t.IsPtr()
// result: (ADDQconst [int32(c)] x)
@ -1873,21 +1891,6 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
}
break
}
// match: (ADDQ x (SHLQconst [1] y))
// result: (LEAQ2 x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
continue
}
y := v_1.Args[0]
v.reset(OpAMD64LEAQ2)
v.AddArg2(x, y)
return true
}
break
}
// match: (ADDQ x (ADDQ y y))
// result: (LEAQ2 x y)
for {
@ -2052,14 +2055,17 @@ func rewriteValueAMD64_OpAMD64ADDQconst(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (ADDQconst [c] (SHLQconst [1] x))
// match: (ADDQconst [c] (ADDQ x x))
// result: (LEAQ1 [c] x x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
if v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
x := v_0.Args[0]
v.reset(OpAMD64LEAQ1)
v.AuxInt = int32ToAuxInt(c)
v.AddArg2(x, x)
@ -3637,6 +3643,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (BTLconst [c] (ADDQ x x))
// cond: c>1
// result: (BTLconst [c-1] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] || !(c > 1) {
break
}
v.reset(OpAMD64BTLconst)
v.AuxInt = int8ToAuxInt(c - 1)
v.AddArg(x)
return true
}
// match: (BTLconst [c] (SHLQconst [d] x))
// cond: c>d
// result: (BTLconst [c-d] x)
@ -3689,6 +3712,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (BTLconst [c] (ADDL x x))
// cond: c>1
// result: (BTLconst [c-1] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ADDL {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] || !(c > 1) {
break
}
v.reset(OpAMD64BTLconst)
v.AuxInt = int8ToAuxInt(c - 1)
v.AddArg(x)
return true
}
// match: (BTLconst [c] (SHLLconst [d] x))
// cond: c>d
// result: (BTLconst [c-d] x)
@ -3761,6 +3801,23 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
v.AddArg(x)
return true
}
// match: (BTQconst [c] (ADDQ x x))
// cond: c>1
// result: (BTQconst [c-1] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] || !(c > 1) {
break
}
v.reset(OpAMD64BTQconst)
v.AuxInt = int8ToAuxInt(c - 1)
v.AddArg(x)
return true
}
// match: (BTQconst [c] (SHLQconst [d] x))
// cond: c>d
// result: (BTQconst [c-d] x)
@ -8287,17 +8344,20 @@ func rewriteValueAMD64_OpAMD64LEAL1(v *Value) bool {
}
break
}
// match: (LEAL1 [c] {s} x (SHLLconst [1] y))
// match: (LEAL1 [c] {s} x (ADDL y y))
// result: (LEAL2 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDL {
continue
}
y := v_1.Args[1]
if y != v_1.Args[0] {
continue
}
y := v_1.Args[0]
v.reset(OpAMD64LEAL2)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -8391,16 +8451,19 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAL2 [c] {s} x (SHLLconst [1] y))
// match: (LEAL2 [c] {s} x (ADDL y y))
// result: (LEAL4 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDL {
break
}
y := v_1.Args[1]
if y != v_1.Args[0] {
break
}
y := v_1.Args[0]
v.reset(OpAMD64LEAL4)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -8423,6 +8486,26 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAL2 [0] {s} (ADDL x x) x)
// cond: s == nil
// result: (SHLLconst [2] x)
for {
if auxIntToInt32(v.AuxInt) != 0 {
break
}
s := auxToSym(v.Aux)
if v_0.Op != OpAMD64ADDL {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] || x != v_1 || !(s == nil) {
break
}
v.reset(OpAMD64SHLLconst)
v.AuxInt = int8ToAuxInt(2)
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool {
@ -8470,16 +8553,19 @@ func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAL4 [c] {s} x (SHLLconst [1] y))
// match: (LEAL4 [c] {s} x (ADDL y y))
// result: (LEAL8 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDL {
break
}
y := v_1.Args[1]
if y != v_1.Args[0] {
break
}
y := v_1.Args[0]
v.reset(OpAMD64LEAL8)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -8721,17 +8807,20 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
}
break
}
// match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
// match: (LEAQ1 [c] {s} x (ADDQ y y))
// result: (LEAQ2 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDQ {
continue
}
y := v_1.Args[1]
if y != v_1.Args[0] {
continue
}
y := v_1.Args[0]
v.reset(OpAMD64LEAQ2)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -8924,16 +9013,19 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
// match: (LEAQ2 [c] {s} x (ADDQ y y))
// result: (LEAQ4 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDQ {
break
}
y := v_1.Args[1]
if y != v_1.Args[0] {
break
}
y := v_1.Args[0]
v.reset(OpAMD64LEAQ4)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -8956,6 +9048,26 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAQ2 [0] {s} (ADDQ x x) x)
// cond: s == nil
// result: (SHLQconst [2] x)
for {
if auxIntToInt32(v.AuxInt) != 0 {
break
}
s := auxToSym(v.Aux)
if v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] || x != v_1 || !(s == nil) {
break
}
v.reset(OpAMD64SHLQconst)
v.AuxInt = int8ToAuxInt(2)
v.AddArg(x)
return true
}
// match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB
// result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
@ -9087,16 +9199,19 @@ func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool {
v.AddArg2(x, y)
return true
}
// match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
// match: (LEAQ4 [c] {s} x (ADDQ y y))
// result: (LEAQ8 [c] {s} x y)
for {
c := auxIntToInt32(v.AuxInt)
s := auxToSym(v.Aux)
x := v_0
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
if v_1.Op != OpAMD64ADDQ {
break
}
y := v_1.Args[1]
if y != v_1.Args[0] {
break
}
y := v_1.Args[0]
v.reset(OpAMD64LEAQ8)
v.AuxInt = int32ToAuxInt(c)
v.Aux = symToAux(s)
@ -20736,18 +20851,6 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHLLconst [1] (SHRLconst [1] x))
// result: (ANDLconst [-2] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
// match: (SHLLconst x [0])
// result: x
for {
@ -20758,6 +20861,33 @@ func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
v.copyOf(x)
return true
}
// match: (SHLLconst [1] x)
// result: (ADDL x x)
for {
if auxIntToInt8(v.AuxInt) != 1 {
break
}
x := v_0
v.reset(OpAMD64ADDL)
v.AddArg2(x, x)
return true
}
// match: (SHLLconst [c] (ADDL x x))
// result: (SHLLconst [c+1] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ADDL {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
v.reset(OpAMD64SHLLconst)
v.AuxInt = int8ToAuxInt(c + 1)
v.AddArg(x)
return true
}
// match: (SHLLconst [d] (MOVLconst [c]))
// result: (MOVLconst [c << uint64(d)])
for {
@ -20992,18 +21122,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHLQconst [1] (SHRQconst [1] x))
// result: (ANDQconst [-2] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
break
}
x := v_0.Args[0]
v.reset(OpAMD64ANDQconst)
v.AuxInt = int32ToAuxInt(-2)
v.AddArg(x)
return true
}
// match: (SHLQconst x [0])
// result: x
for {
@ -21014,6 +21132,33 @@ func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
v.copyOf(x)
return true
}
// match: (SHLQconst [1] x)
// result: (ADDQ x x)
for {
if auxIntToInt8(v.AuxInt) != 1 {
break
}
x := v_0
v.reset(OpAMD64ADDQ)
v.AddArg2(x, x)
return true
}
// match: (SHLQconst [c] (ADDQ x x))
// result: (SHLQconst [c+1] x)
for {
c := auxIntToInt8(v.AuxInt)
if v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
v.reset(OpAMD64SHLQconst)
v.AuxInt = int8ToAuxInt(c + 1)
v.AddArg(x)
return true
}
// match: (SHLQconst [d] (MOVQconst [c]))
// result: (MOVQconst [c << uint64(d)])
for {
@ -21419,13 +21564,16 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHRLconst [1] (SHLLconst [1] x))
// match: (SHRLconst [1] (ADDL x x))
// result: (ANDLconst [0x7fffffff] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
x := v_0.Args[0]
v.reset(OpAMD64ANDLconst)
v.AuxInt = int32ToAuxInt(0x7fffffff)
v.AddArg(x)
@ -21663,13 +21811,16 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
}
func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SHRQconst [1] (SHLQconst [1] x))
// match: (SHRQconst [1] (ADDQ x x))
// result: (BTRQconst [63] x)
for {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ {
break
}
x := v_0.Args[1]
if x != v_0.Args[0] {
break
}
x := v_0.Args[0]
v.reset(OpAMD64BTRQconst)
v.AuxInt = int8ToAuxInt(63)
v.AddArg(x)

View File

@ -185,6 +185,15 @@ func Pow2Muls(n1, n2 int) (int, int) {
return a, b
}
func Mul_2(n1 int32, n2 int64) (int32, int64) {
// amd64:"ADDL", -"SHLL"
a := n1 * 2
// amd64:"ADDQ", -"SHLQ"
b := n2 * 2
return a, b
}
func Mul_96(n int) int {
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`

View File

@ -120,6 +120,16 @@ func bitoff64(a, b uint64) (n uint64) {
return n
}
func clearLastBit(x int64, y int32) (int64, int32) {
// amd64:"ANDQ\t[$]-2"
a := (x >> 1) << 1
// amd64:"ANDL\t[$]-2"
b := (y >> 1) << 1
return a, b
}
func bitcompl64(a, b uint64) (n uint64) {
// amd64:"BTCQ"
n += b ^ (1 << (a & 63))

View File

@ -58,6 +58,16 @@ func rshConst64x64Overflow8(v int8) int64 {
return int64(v) >> 8
}
func lshConst32x1(v int32) int32 {
// amd64:"ADDL", -"SHLL"
return v << 1
}
func lshConst64x1(v int64) int64 {
// amd64:"ADDQ", -"SHLQ"
return v << 1
}
func lshConst32x64(v int32) int32 {
// ppc64x:"SLW"
// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
@ -94,6 +104,26 @@ func rshConst64x32(v int64) int64 {
return v >> uint32(33)
}
func lshConst32x1Add(x int32) int32 {
// amd64:"SHLL\t[$]2"
return (x + x) << 1
}
func lshConst64x1Add(x int64) int64 {
// amd64:"SHLQ\t[$]2"
return (x + x) << 1
}
func lshConst32x2Add(x int32) int32 {
// amd64:"SHLL\t[$]3"
return (x + x) << 2
}
func lshConst64x2Add(x int64) int64 {
// amd64:"SHLQ\t[$]3"
return (x + x) << 2
}
// ------------------ //
// masked shifts //
// ------------------ //