mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
ADD(Q|L) has generally twice the throughput. Came up in CL 626998. Throughput by arch: Zen 4: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Intel Alder Lake: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.2 Intel Haswell: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Also include a minor opt for: (x + x) << c -> x << (c + 1) Before this, the code: func addShift(x int64) int64 { return (x + x) << 1 } emitted two instructions: ADDQ AX, AX SHLQ $1, AX but we can do it in a single shift: SHLQ $2, AX Add a codegen test for clearing the last bit. compilecmp linux/amd64: math math.sqrt 243 -> 242 (-0.41%) math [cmd/compile] math.sqrt 243 -> 242 (-0.41%) runtime runtime.selectgo 5455 -> 5445 (-0.18%) runtime.sysargs 665 -> 662 (-0.45%) runtime.isPinned 145 -> 141 (-2.76%) runtime.atoi64 198 -> 194 (-2.02%) runtime.setPinned 714 -> 709 (-0.70%) runtime [cmd/compile] runtime.sysargs 665 -> 662 (-0.45%) runtime.setPinned 714 -> 709 (-0.70%) runtime.atoi64 198 -> 194 (-2.02%) runtime.isPinned 145 -> 141 (-2.76%) strconv strconv.computeBounds 109 -> 107 (-1.83%) strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv [cmd/compile] strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv.computeBounds 109 -> 107 (-1.83%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) image image.Rectangle.Inset 101 -> 97 (-3.96%) regexp/syntax regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) time time.Time.Before 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.Time.Sub 444 -> 425 (-4.28%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.div 592 -> 587 (-0.84%) time.Time.UnixNano 85 -> 78 (-8.24%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.After 179 -> 161 (-10.06%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.(*Time).IsZero 113 -> 108 (-4.42%) regexp regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) regexp/syntax [cmd/compile] regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) encoding/base64 encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) time [cmd/compile] time.(*Time).IsZero 113 -> 108 (-4.42%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.Before 179 -> 161 (-10.06%) time.Time.UnixNano 85 -> 78 (-8.24%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.div 592 -> 587 (-0.84%) time.Time.After 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.Sub 444 -> 425 (-4.28%) index/suffixarray index/suffixarray.sais_8_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_64 1677 -> 1654 (-1.37%) index/suffixarray.sais_8_64 1677 -> 1654 (-1.37%) index/suffixarray.writeInt 249 -> 247 (-0.80%) os os.Expand 1070 -> 1051 (-1.78%) os.Chtimes 787 -> 774 (-1.65%) regexp [cmd/compile] regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) encoding/base64 [cmd/compile] encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) encoding/hex encoding/hex.Encode 138 -> 136 (-1.45%) encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) crypto/des crypto/des.initFeistelBox 235 -> 229 (-2.55%) crypto/des.cryptBlock 549 -> 538 (-2.00%) os [cmd/compile] os.Chtimes 787 -> 774 (-1.65%) os.Expand 1070 -> 1051 (-1.78%) math/big math/big.newFloat 238 -> 223 (-6.30%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.basicSqr 1032 -> 1017 (-1.45%) cmd/vendor/golang.org/x/sys/unix cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66 (-8.33%) encoding/json encoding/json.Indent 404 -> 403 (-0.25%) encoding/json.MarshalIndent 303 -> 297 (-1.98%) testing testing.(*T).Deadline 84 -> 82 (-2.38%) testing.(*M).Run 3545 -> 3525 (-0.56%) archive/zip archive/zip.headerFileInfo.ModTime 229 -> 223 (-2.62%) encoding/gob encoding/gob.(*encoderState).encodeInt 474 -> 469 (-1.05%) crypto/elliptic crypto/elliptic.Marshal 728 -> 714 (-1.92%) debug/buildinfo debug/buildinfo.readString 325 -> 315 (-3.08%) image/png image/png.(*decoder).readImagePass 10866 -> 10834 (-0.29%) archive/tar archive/tar.Header.allowedFormats.func3 1768 -> 1736 (-1.81%) archive/tar.formatPAXTime 389 -> 358 (-7.97%) archive/tar.(*Writer).writeGNUHeader 741 -> 727 (-1.89%) archive/tar.readGNUSparseMap0x1 709 -> 695 (-1.97%) archive/tar.(*Writer).templateV7Plus 915 -> 909 (-0.66%) crypto/internal/cryptotest crypto/internal/cryptotest.TestHash.func4 890 -> 879 (-1.24%) crypto/internal/cryptotest.TestStream.func6.1 646 -> 645 (-0.15%) crypto/internal/cryptotest.testCipher.func3 1300 -> 1289 (-0.85%) internal/pkgbits internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) testing/quick testing/quick.(*Config).getRand 316 -> 315 (-0.32%) log/slog log/slog.TimeValue 489 -> 479 (-2.04%) runtime/pprof runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) internal/coverage/cfile internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822 (-0.24%) internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892 (-1.33%) cmd/internal/objabi cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) crypto/ecdsa crypto/ecdsa.pointFromAffine 1162 -> 1144 (-1.55%) net net.minNonzeroTime 313 -> 308 (-1.60%) net.cgoLookupAddrPTR 812 -> 797 (-1.85%) net.(*IPNet).String 851 -> 827 (-2.82%) net.IP.AppendText 488 -> 471 (-3.48%) net.IPMask.String 281 -> 270 (-3.91%) net.partialDeadline 374 -> 366 (-2.14%) net.hexString 249 -> 240 (-3.61%) net.IP.String 454 -> 453 (-0.22%) internal/fuzz internal/fuzz.newPcgRand 240 -> 234 (-2.50%) crypto/x509 crypto/x509.(*Certificate).isValid 2642 -> 2611 (-1.17%) cmd/internal/obj/s390x cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) encoding/hex [cmd/compile] encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) encoding/hex.Encode 138 -> 136 (-1.45%) cmd/internal/objabi [cmd/compile] cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) math/big [cmd/compile] math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.basicSqr 1032 -> 1017 (-1.45%) math/big.newFloat 238 -> 223 (-6.30%) encoding/json [cmd/compile] encoding/json.MarshalIndent 303 -> 297 (-1.98%) encoding/json.Indent 404 -> 403 (-0.25%) cmd/covdata main.(*metaMerge).emitCounters 985 -> 973 (-1.22%) runtime/pprof [cmd/compile] runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) cmd/compile/internal/syntax cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/dist main.runInstall 19081 -> 19049 (-0.17%) crypto/tls crypto/tls.extractPadding 176 -> 175 (-0.57%) slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247 (-2.37%) slices.Clone[[]uint16,uint16] 253 -> 247 (-2.37%) slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247 (-2.37%) crypto/tls.(*Config).cipherSuites 335 -> 326 (-2.69%) slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434 (-0.69%) crypto/tls.dial 1349 -> 1339 (-0.74%) slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434 (-0.69%) internal/pkgbits [cmd/compile] internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) cmd/compile/internal/syntax [cmd/compile] cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/internal/obj/s390x [cmd/compile] cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) cmd/go/internal/trace cmd/go/internal/trace.Flow 910 -> 886 (-2.64%) cmd/go/internal/trace.(*Span).Done 311 -> 304 (-2.25%) cmd/go/internal/trace.StartSpan 620 -> 615 (-0.81%) cmd/internal/script cmd/internal/script.(*Engine).Execute.func2 534 -> 528 (-1.12%) cmd/link/internal/loader cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338 (-1.74%) net/http net/http.(*Transport).queueForIdleConn 1797 -> 1766 (-1.73%) net/http.(*Transport).getConn 2149 -> 2131 (-0.84%) net/http.(*http2ClientConn).tooIdleLocked 207 -> 197 (-4.83%) net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508 (-2.31%) net/http.(*Cookie).Valid 837 -> 818 (-2.27%) net/http.(*http2responseWriter).SetReadDeadline 373 -> 357 (-4.29%) net/http.checkIfRange 701 -> 690 (-1.57%) net/http.(*http2SettingsFrame).Value 325 -> 298 (-8.31%) net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767 (-1.29%) net/http.(*Server).Serve 1746 -> 1739 (-0.40%) net/http.http2traceGotConn 569 -> 556 (-2.28%) net/http/pprof net/http/pprof.collectProfile 242 -> 239 (-1.24%) cmd/compile/internal/coverage cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/telemetry/internal/upload cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540 (-0.66%) cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572 (-0.89%) cmd/compile/internal/coverage [cmd/compile] cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/text/language cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284 (-1.05%) cmd/go/internal/vcweb cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041 (-0.38%) cmd/go/internal/vcs cmd/go/internal/vcs.expand 761 -> 741 (-2.63%) cmd/compile/internal/inline/inlheur slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/compile/internal/inline/inlheur [cmd/compile] slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/go/internal/modfetch/codehost cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213 (-0.18%) cmd/link/internal/ld cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152 (-3.18%) cmd/link/internal/ld.(*Link).address 12559 -> 12495 (-0.51%) cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205 (-0.76%) cmd/link/internal/ld.elfshreloc 618 -> 616 (-0.32%) cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779 (-1.89%) cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663 (-0.75%) cmd/link/internal/ld.relocSectFn 285 -> 284 (-0.35%) cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144 (-1.37%) cmd/link/internal/ld.decodetypeArrayLen 157 -> 152 (-3.18%) cmd/link/internal/arm64 cmd/link/internal/arm64.gensymlate.func1 895 -> 888 (-0.78%) cmd/go/internal/modload cmd/go/internal/modload.queryProxy.func3 1029 -> 1012 (-1.65%) cmd/go/internal/load cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447 (-0.07%) cmd/go/internal/clean cmd/go/internal/clean.runClean 2120 -> 2104 (-0.75%) cmd/compile/internal/ssa cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294 (-0.96%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724 (-0.33%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa [cmd/compile] cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) file before after Δ % math/bits.s 2352 2354 +2 +0.085% math/bits [cmd/compile].s 2352 2354 +2 +0.085% math.s 35675 35674 -1 -0.003% math [cmd/compile].s 35675 35674 -1 -0.003% runtime.s 577251 577245 -6 -0.001% runtime [cmd/compile].s 642419 642438 +19 +0.003% sort.s 37434 37435 +1 +0.003% strconv.s 48391 48343 -48 -0.099% sort [cmd/compile].s 37434 37435 +1 +0.003% bufio.s 21386 21418 +32 +0.150% strconv [cmd/compile].s 48391 48343 -48 -0.099% image.s 34978 35022 +44 +0.126% regexp/syntax.s 81719 81781 +62 +0.076% time.s 94341 94184 -157 -0.166% regexp.s 60411 60399 -12 -0.020% bufio [cmd/compile].s 21512 21544 +32 +0.149% encoding/binary.s 34062 34087 +25 +0.073% regexp/syntax [cmd/compile].s 81719 81781 +62 +0.076% encoding/base64.s 11907 11903 -4 -0.034% time [cmd/compile].s 94341 94184 -157 -0.166% index/suffixarray.s 41633 41527 -106 -0.255% os.s 101770 101738 -32 -0.031% regexp [cmd/compile].s 60411 60399 -12 -0.020% encoding/binary [cmd/compile].s 37173 37198 +25 +0.067% encoding/base64 [cmd/compile].s 11907 11903 -4 -0.034% os/exec.s 23900 23907 +7 +0.029% encoding/hex.s 6038 6030 -8 -0.132% crypto/des.s 5073 5056 -17 -0.335% os [cmd/compile].s 102030 101998 -32 -0.031% vendor/golang.org/x/net/http2/hpack.s 22027 22033 +6 +0.027% math/big.s 164808 164753 -55 -0.033% cmd/vendor/golang.org/x/sys/unix.s 121450 121444 -6 -0.005% encoding/json.s 110294 110287 -7 -0.006% testing.s 115303 115281 -22 -0.019% archive/zip.s 65329 65325 -4 -0.006% os/user.s 10078 10080 +2 +0.020% encoding/gob.s 143788 143783 -5 -0.003% crypto/elliptic.s 30686 30704 +18 +0.059% go/doc/comment.s 49401 49433 +32 +0.065% debug/buildinfo.s 9095 9085 -10 -0.110% image/png.s 36113 36081 -32 -0.089% archive/tar.s 71994 71897 -97 -0.135% crypto/internal/cryptotest.s 60872 60849 -23 -0.038% internal/pkgbits.s 20441 20429 -12 -0.059% testing/quick.s 8236 8235 -1 -0.012% log/slog.s 77568 77558 -10 -0.013% internal/trace/internal/oldtrace.s 52885 52896 +11 +0.021% runtime/pprof.s 123978 123969 -9 -0.007% internal/coverage/cfile.s 25198 25184 -14 -0.056% cmd/internal/objabi.s 19954 19946 -8 -0.040% crypto/ecdsa.s 29159 29141 -18 -0.062% log/slog/internal/benchmarks.s 6694 6695 +1 +0.015% net.s 299569 299503 -66 -0.022% os/exec [cmd/compile].s 23888 23895 +7 +0.029% internal/trace.s 179226 179240 +14 +0.008% internal/fuzz.s 86190 86191 +1 +0.001% crypto/x509.s 177195 177164 -31 -0.017% cmd/internal/obj/s390x.s 121642 121610 -32 -0.026% cmd/internal/obj/ppc64.s 140118 140122 +4 +0.003% encoding/hex [cmd/compile].s 6149 6141 -8 -0.130% cmd/internal/objabi [cmd/compile].s 19954 19946 -8 -0.040% cmd/internal/obj/arm64.s 158523 158555 +32 +0.020% go/doc/comment [cmd/compile].s 49512 49544 +32 +0.065% math/big [cmd/compile].s 166394 166339 -55 -0.033% encoding/json [cmd/compile].s 110712 110705 -7 -0.006% cmd/covdata.s 39699 39687 -12 -0.030% runtime/pprof [cmd/compile].s 125209 125200 -9 -0.007% cmd/compile/internal/syntax.s 181755 181736 -19 -0.010% cmd/dist.s 177893 177861 -32 -0.018% crypto/tls.s 389157 389113 -44 -0.011% internal/pkgbits [cmd/compile].s 41644 41632 -12 -0.029% cmd/compile/internal/syntax [cmd/compile].s 196105 196086 -19 -0.010% cmd/compile/internal/types.s 71315 71345 +30 +0.042% cmd/internal/obj/s390x [cmd/compile].s 121733 121701 -32 -0.026% cmd/go/internal/trace.s 4796 4760 -36 -0.751% cmd/internal/obj/arm64 [cmd/compile].s 168120 168147 +27 +0.016% cmd/internal/obj/ppc64 [cmd/compile].s 140219 140223 +4 +0.003% cmd/internal/script.s 83442 83436 -6 -0.007% cmd/link/internal/loader.s 93299 93294 -5 -0.005% net/http.s 620639 620472 -167 -0.027% net/http/pprof.s 35016 35013 -3 -0.009% cmd/compile/internal/coverage.s 6668 6667 -1 -0.015% cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210 34148 -62 -0.181% cmd/compile/internal/coverage [cmd/compile].s 6664 6663 -1 -0.015% cmd/vendor/golang.org/x/text/language.s 48077 48074 -3 -0.006% cmd/go/internal/vcweb.s 45193 45189 -4 -0.009% cmd/go/internal/vcs.s 44749 44729 -20 -0.045% cmd/compile/internal/inline/inlheur.s 83758 83742 -16 -0.019% cmd/compile/internal/inline/inlheur [cmd/compile].s 84773 84757 -16 -0.019% cmd/go/internal/modfetch/codehost.s 89098 89094 -4 -0.004% cmd/trace.s 257550 257564 +14 +0.005% cmd/link/internal/ld.s 641945 641706 -239 -0.037% cmd/link/internal/arm64.s 34805 34798 -7 -0.020% cmd/go/internal/modload.s 328971 328954 -17 -0.005% cmd/go/internal/load.s 178877 178871 -6 -0.003% cmd/go/internal/clean.s 11006 10990 -16 -0.145% cmd/compile/internal/ssa.s 3552843 3553347 +504 +0.014% cmd/compile/internal/ssa [cmd/compile].s 3752511 3753123 +612 +0.016% total 36179015 36178687 -328 -0.001% Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5 Reviewed-on: https://go-review.googlesource.com/c/go/+/641757 Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
677 lines
16 KiB
Go
677 lines
16 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on integer types.
|
|
// For codegen tests on float types, see floats.go.
|
|
|
|
// ----------------- //
|
|
// Addition //
|
|
// ----------------- //
|
|
|
|
func AddLargeConst(a uint64, out []uint64) {
|
|
// ppc64x/power10:"ADD\t[$]4294967296,"
|
|
// ppc64x/power9:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
|
|
// ppc64x/power8:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
|
|
out[0] = a + 0x100000000
|
|
// ppc64x/power10:"ADD\t[$]-8589934592,"
|
|
// ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
|
|
// ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
|
|
out[1] = a + 0xFFFFFFFE00000000
|
|
// ppc64x/power10:"ADD\t[$]1234567,"
|
|
// ppc64x/power9:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
|
|
// ppc64x/power8:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
|
|
out[2] = a + 1234567
|
|
// ppc64x/power10:"ADD\t[$]-1234567,"
|
|
// ppc64x/power9:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
|
|
// ppc64x/power8:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
|
|
out[3] = a - 1234567
|
|
// ppc64x/power10:"ADD\t[$]2147450879,"
|
|
// ppc64x/power9:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
|
|
// ppc64x/power8:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
|
|
out[4] = a + 0x7FFF7FFF
|
|
// ppc64x/power10:"ADD\t[$]-2147483647,"
|
|
// ppc64x/power9:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
|
|
// ppc64x/power8:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
|
|
out[5] = a - 2147483647
|
|
// ppc64x:"ADDIS\t[$]-32768,", ^"ADD\t"
|
|
out[6] = a - 2147483648
|
|
// ppc64x:"ADD\t[$]2147450880,", ^"ADDIS\t"
|
|
out[7] = a + 0x7FFF8000
|
|
// ppc64x:"ADD\t[$]-32768,", ^"ADDIS\t"
|
|
out[8] = a - 32768
|
|
// ppc64x/power10:"ADD\t[$]-32769,"
|
|
// ppc64x/power9:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
|
|
// ppc64x/power8:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
|
|
out[9] = a - 32769
|
|
}
|
|
|
|
// ----------------- //
|
|
// Subtraction //
|
|
// ----------------- //
|
|
|
|
var ef int
|
|
|
|
func SubMem(arr []int, b, c, d int) int {
|
|
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
|
|
arr[2] -= b
|
|
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
|
|
// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
|
|
arr[3] -= b
|
|
// 386:`DECL\s16\([A-Z]+\)`
|
|
arr[4]--
|
|
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
|
|
arr[5] -= 20
|
|
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
|
|
ef -= arr[b]
|
|
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[c] -= b
|
|
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[d] -= 15
|
|
// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
|
|
arr[b]--
|
|
// amd64:`DECQ\s64\([A-Z]+\)`
|
|
arr[8]--
|
|
// 386:"SUBL\t4"
|
|
// amd64:"SUBQ\t8"
|
|
return arr[0] - arr[1]
|
|
}
|
|
|
|
func SubFromConst(a int) int {
|
|
// ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
b := 40 - a
|
|
return b
|
|
}
|
|
|
|
func SubFromConstNeg(a int) int {
|
|
// ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR`
|
|
c := 40 - (-a)
|
|
return c
|
|
}
|
|
|
|
func SubSubFromConst(a int) int {
|
|
// ppc64x: `ADD\t[$]20,\sR[0-9]+,\sR`
|
|
c := 40 - (20 - a)
|
|
return c
|
|
}
|
|
|
|
func AddSubFromConst(a int) int {
|
|
// ppc64x: `SUBC\tR[0-9]+,\s[$]60,\sR`
|
|
c := 40 + (20 - a)
|
|
return c
|
|
}
|
|
|
|
func NegSubFromConst(a int) int {
|
|
// ppc64x: `ADD\t[$]-20,\sR[0-9]+,\sR`
|
|
c := -(20 - a)
|
|
return c
|
|
}
|
|
|
|
func NegAddFromConstNeg(a int) int {
|
|
// ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
|
|
c := -(-40 + a)
|
|
return c
|
|
}
|
|
|
|
func SubSubNegSimplify(a, b int) int {
|
|
// amd64:"NEGQ"
|
|
// ppc64x:"NEG"
|
|
r := (a - b) - a
|
|
return r
|
|
}
|
|
|
|
func SubAddSimplify(a, b int) int {
|
|
// amd64:-"SUBQ",-"ADDQ"
|
|
// ppc64x:-"SUB",-"ADD"
|
|
r := a + (b - a)
|
|
return r
|
|
}
|
|
|
|
func SubAddSimplify2(a, b, c int) (int, int, int, int, int, int) {
|
|
// amd64:-"ADDQ"
|
|
r := (a + b) - (a + c)
|
|
// amd64:-"ADDQ"
|
|
r1 := (a + b) - (c + a)
|
|
// amd64:-"ADDQ"
|
|
r2 := (b + a) - (a + c)
|
|
// amd64:-"ADDQ"
|
|
r3 := (b + a) - (c + a)
|
|
// amd64:-"SUBQ"
|
|
r4 := (a - c) + (c + b)
|
|
// amd64:-"SUBQ"
|
|
r5 := (a - c) + (b + c)
|
|
return r, r1, r2, r3, r4, r5
|
|
}
|
|
|
|
func SubAddNegSimplify(a, b int) int {
|
|
// amd64:"NEGQ",-"ADDQ",-"SUBQ"
|
|
// ppc64x:"NEG",-"ADD",-"SUB"
|
|
r := a - (b + a)
|
|
return r
|
|
}
|
|
|
|
func AddAddSubSimplify(a, b, c int) int {
|
|
// amd64:-"SUBQ"
|
|
// ppc64x:-"SUB"
|
|
r := a + (b + (c - a))
|
|
return r
|
|
}
|
|
|
|
// -------------------- //
|
|
// Multiplication //
|
|
// -------------------- //
|
|
|
|
func Pow2Muls(n1, n2 int) (int, int) {
|
|
// amd64:"SHLQ\t[$]5",-"IMULQ"
|
|
// 386:"SHLL\t[$]5",-"IMULL"
|
|
// arm:"SLL\t[$]5",-"MUL"
|
|
// arm64:"LSL\t[$]5",-"MUL"
|
|
// ppc64x:"SLD\t[$]5",-"MUL"
|
|
a := n1 * 32
|
|
|
|
// amd64:"SHLQ\t[$]6",-"IMULQ"
|
|
// 386:"SHLL\t[$]6",-"IMULL"
|
|
// arm:"SLL\t[$]6",-"MUL"
|
|
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
|
|
// ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
|
|
b := -64 * n2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_2(n1 int32, n2 int64) (int32, int64) {
|
|
// amd64:"ADDL", -"SHLL"
|
|
a := n1 * 2
|
|
// amd64:"ADDQ", -"SHLQ"
|
|
b := n2 * 2
|
|
|
|
return a, b
|
|
}
|
|
|
|
func Mul_96(n int) int {
|
|
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
|
|
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
|
|
// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
|
|
// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
|
|
// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
|
|
return n * 96
|
|
}
|
|
|
|
func Mul_n120(n int) int {
|
|
// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
|
|
return n * -120
|
|
}
|
|
|
|
func MulMemSrc(a []uint32, b []float32) {
|
|
// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
|
|
a[0] *= a[1]
|
|
// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
// amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
|
|
b[0] *= b[1]
|
|
}
|
|
|
|
// Multiplications merging tests
|
|
|
|
func MergeMuls1(n int) int {
|
|
// amd64:"IMUL3Q\t[$]46"
|
|
// 386:"IMUL3L\t[$]46"
|
|
// ppc64x:"MULLD\t[$]46"
|
|
return 15*n + 31*n // 46n
|
|
}
|
|
|
|
func MergeMuls2(n int) int {
|
|
// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
|
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
|
// ppc64x/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
|
|
// ppc64x/power8:"MULLD\t[$]23","ADD\t[$]29"
|
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
|
}
|
|
|
|
func MergeMuls3(a, n int) int {
|
|
// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
|
|
// ppc64x:"ADD\t[$]19",-"MULLD\t[$]19"
|
|
return a*n + 19*n // (a+19)n
|
|
}
|
|
|
|
func MergeMuls4(n int) int {
|
|
// amd64:"IMUL3Q\t[$]14"
|
|
// 386:"IMUL3L\t[$]14"
|
|
// ppc64x:"MULLD\t[$]14"
|
|
return 23*n - 9*n // 14n
|
|
}
|
|
|
|
func MergeMuls5(a, n int) int {
|
|
// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
|
|
// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
|
|
// ppc64x:"ADD\t[$]-19",-"MULLD\t[$]19"
|
|
return a*n - 19*n // (a-19)n
|
|
}
|
|
|
|
// -------------- //
|
|
// Division //
|
|
// -------------- //
|
|
|
|
func DivMemSrc(a []float64) {
|
|
// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
// amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
|
|
a[0] /= a[1]
|
|
}
|
|
|
|
func Pow2Divs(n1 uint, n2 int) (uint, int) {
|
|
// 386:"SHRL\t[$]5",-"DIVL"
|
|
// amd64:"SHRQ\t[$]5",-"DIVQ"
|
|
// arm:"SRL\t[$]5",-".*udiv"
|
|
// arm64:"LSR\t[$]5",-"UDIV"
|
|
// ppc64x:"SRD"
|
|
a := n1 / 32 // unsigned
|
|
|
|
// amd64:"SARQ\t[$]6",-"IDIVQ"
|
|
// 386:"SARL\t[$]6",-"IDIVL"
|
|
// arm:"SRA\t[$]6",-".*udiv"
|
|
// arm64:"ASR\t[$]6",-"SDIV"
|
|
// ppc64x:"SRAD"
|
|
b := n2 / 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant divisions get turned into MULs
|
|
func ConstDivs(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
|
|
// arm64:`MOVD`,`UMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
a := n1 / 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
|
|
// arm64:`SMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
b := n2 / 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
func FloatDivs(a []float32) float32 {
|
|
// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
// 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
|
|
return a[1] / a[2]
|
|
}
|
|
|
|
func Pow2Mods(n1 uint, n2 int) (uint, int) {
|
|
// 386:"ANDL\t[$]31",-"DIVL"
|
|
// amd64:"ANDL\t[$]31",-"DIVQ"
|
|
// arm:"AND\t[$]31",-".*udiv"
|
|
// arm64:"AND\t[$]31",-"UDIV"
|
|
// ppc64x:"RLDICL"
|
|
a := n1 % 32 // unsigned
|
|
|
|
// 386:"SHRL",-"IDIVL"
|
|
// amd64:"SHRQ",-"IDIVQ"
|
|
// arm:"SRA",-".*udiv"
|
|
// arm64:"ASR",-"REM"
|
|
// ppc64x:"SRAD"
|
|
b := n2 % 64 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that signed divisibility checks get converted to AND on low bits
|
|
func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
|
|
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
|
|
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
|
|
// arm:"AND\t[$]63",-".*udiv",-"SRA"
|
|
// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
|
|
// ppc64x:"ANDCC",-"RLDICL",-"SRAD",-"CMP"
|
|
a := n1%64 == 0 // signed divisible
|
|
|
|
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
|
|
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
|
|
// arm:"AND\t[$]63",-".*udiv",-"SRA"
|
|
// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
|
|
// ppc64x:"ANDCC",-"RLDICL",-"SRAD",-"CMP"
|
|
b := n2%64 != 0 // signed indivisible
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that constant modulo divs get turned into MULs
|
|
func ConstMods(n1 uint, n2 int) (uint, int) {
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
|
|
// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
|
|
// arm64:`MOVD`,`UMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
a := n1 % 17 // unsigned
|
|
|
|
// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
|
|
// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
|
|
// arm64:`SMULH`,-`DIV`
|
|
// arm:`MOVW`,`MUL`,-`.*udiv`
|
|
b := n2 % 17 // signed
|
|
|
|
return a, b
|
|
}
|
|
|
|
// Check that divisibility checks x%c==0 are converted to MULs and rotates
|
|
func DivisibleU(n uint) (bool, bool) {
|
|
// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
|
|
// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
|
|
// arm:"MUL","CMP\t[$]715827882",-".*udiv"
|
|
// ppc64x:"MULLD","ROTL\t[$]63"
|
|
even := n%6 == 0
|
|
|
|
// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
|
|
// arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
|
|
// arm:"MUL","CMP\t[$]226050910",-".*udiv"
|
|
// ppc64x:"MULLD",-"ROTL"
|
|
odd := n%19 == 0
|
|
|
|
return even, odd
|
|
}
|
|
|
|
func Divisible(n int) (bool, bool) {
|
|
// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
|
|
// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ADD\tR","ROR",-"DIV"
|
|
// arm:"MUL","ADD\t[$]715827882",-".*udiv"
|
|
// ppc64x/power8:"MULLD","ADD","ROTL\t[$]63"
|
|
// ppc64x/power9:"MADDLD","ROTL\t[$]63"
|
|
even := n%6 == 0
|
|
|
|
// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
|
|
// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
|
|
// arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
|
|
// arm:"MUL","ADD\t[$]113025455",-".*udiv"
|
|
// ppc64x/power8:"MULLD","ADD",-"ROTL"
|
|
// ppc64x/power9:"MADDLD",-"ROTL"
|
|
odd := n%19 == 0
|
|
|
|
return even, odd
|
|
}
|
|
|
|
// Check that fix-up code is not generated for divisions where it has been proven that
|
|
// that the divisor is not -1 or that the dividend is > MinIntNN.
|
|
func NoFix64A(divr int64) (int64, int64) {
|
|
var d int64 = 42
|
|
var e int64 = 84
|
|
if divr > 5 {
|
|
d /= divr // amd64:-"JMP"
|
|
e %= divr // amd64:-"JMP"
|
|
// The following statement is to avoid conflict between the above check
|
|
// and the normal JMP generated at the end of the block.
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix64B(divd int64) (int64, int64) {
|
|
var d int64
|
|
var e int64
|
|
var divr int64 = -1
|
|
if divd > -9223372036854775808 {
|
|
d = divd / divr // amd64:-"JMP"
|
|
e = divd % divr // amd64:-"JMP"
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32A(divr int32) (int32, int32) {
|
|
var d int32 = 42
|
|
var e int32 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix32B(divd int32) (int32, int32) {
|
|
var d int32
|
|
var e int32
|
|
var divr int32 = -1
|
|
if divd > -2147483648 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16A(divr int16) (int16, int16) {
|
|
var d int16 = 42
|
|
var e int16 = 84
|
|
if divr > 5 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d /= divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e %= divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
func NoFix16B(divd int16) (int16, int16) {
|
|
var d int16
|
|
var e int16
|
|
var divr int16 = -1
|
|
if divd > -32768 {
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
d = divd / divr
|
|
// amd64:-"JMP"
|
|
// 386:-"JMP"
|
|
e = divd % divr
|
|
d += e
|
|
}
|
|
return d, e
|
|
}
|
|
|
|
// Check that len() and cap() calls divided by powers of two are
|
|
// optimized into shifts and ands
|
|
|
|
func LenDiv1(a []int) int {
|
|
// 386:"SHRL\t[$]10"
|
|
// amd64:"SHRQ\t[$]10"
|
|
// arm64:"LSR\t[$]10",-"SDIV"
|
|
// arm:"SRL\t[$]10",-".*udiv"
|
|
// ppc64x:"SRD"\t[$]10"
|
|
return len(a) / 1024
|
|
}
|
|
|
|
func LenDiv2(s string) int {
|
|
// 386:"SHRL\t[$]11"
|
|
// amd64:"SHRQ\t[$]11"
|
|
// arm64:"LSR\t[$]11",-"SDIV"
|
|
// arm:"SRL\t[$]11",-".*udiv"
|
|
// ppc64x:"SRD\t[$]11"
|
|
return len(s) / (4097 >> 1)
|
|
}
|
|
|
|
func LenMod1(a []int) int {
|
|
// 386:"ANDL\t[$]1023"
|
|
// amd64:"ANDL\t[$]1023"
|
|
// arm64:"AND\t[$]1023",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64x:"RLDICL"
|
|
return len(a) % 1024
|
|
}
|
|
|
|
func LenMod2(s string) int {
|
|
// 386:"ANDL\t[$]2047"
|
|
// amd64:"ANDL\t[$]2047"
|
|
// arm64:"AND\t[$]2047",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64x:"RLDICL"
|
|
return len(s) % (4097 >> 1)
|
|
}
|
|
|
|
func CapDiv(a []int) int {
|
|
// 386:"SHRL\t[$]12"
|
|
// amd64:"SHRQ\t[$]12"
|
|
// arm64:"LSR\t[$]12",-"SDIV"
|
|
// arm:"SRL\t[$]12",-".*udiv"
|
|
// ppc64x:"SRD\t[$]12"
|
|
return cap(a) / ((1 << 11) + 2048)
|
|
}
|
|
|
|
func CapMod(a []int) int {
|
|
// 386:"ANDL\t[$]4095"
|
|
// amd64:"ANDL\t[$]4095"
|
|
// arm64:"AND\t[$]4095",-"SDIV"
|
|
// arm/6:"AND",-".*udiv"
|
|
// arm/7:"BFC",-".*udiv",-"AND"
|
|
// ppc64x:"RLDICL"
|
|
return cap(a) % ((1 << 11) + 2048)
|
|
}
|
|
|
|
func AddMul(x int) int {
|
|
// amd64:"LEAQ\t1"
|
|
return 2*x + 1
|
|
}
|
|
|
|
func MULA(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r0 := a*b + c
|
|
// arm:`MULA`,-`MUL\s`
|
|
// arm64:`MADDW`,-`MULW`
|
|
r1 := c*79 + a
|
|
// arm:`ADD`,-`MULA`,-`MUL\s`
|
|
// arm64:`ADD`,-`MADD`,-`MULW`
|
|
// ppc64x:`ADD`,-`MULLD`
|
|
r2 := b*64 + c
|
|
return r0, r1, r2
|
|
}
|
|
|
|
func MULS(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// arm/7:`MULS`,-`MUL\s`
|
|
// arm/6:`SUB`,`MUL\s`,-`MULS`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r0 := c - a*b
|
|
// arm/7:`MULS`,-`MUL\s`
|
|
// arm/6:`SUB`,`MUL\s`,-`MULS`
|
|
// arm64:`MSUBW`,-`MULW`
|
|
r1 := a - c*79
|
|
// arm/7:`SUB`,-`MULS`,-`MUL\s`
|
|
// arm64:`SUB`,-`MSUBW`,-`MULW`
|
|
// ppc64x:`SUB`,-`MULLD`
|
|
r2 := c - b*64
|
|
return r0, r1, r2
|
|
}
|
|
|
|
func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
|
|
// amd64:`INCL`
|
|
a++
|
|
// amd64:`DECL`
|
|
b--
|
|
// amd64:`SUBL.*-128`
|
|
c += 128
|
|
return a, b, c
|
|
}
|
|
|
|
// Divide -> shift rules usually require fixup for negative inputs.
|
|
// If the input is non-negative, make sure the fixup is eliminated.
|
|
func divInt(v int64) int64 {
|
|
if v < 0 {
|
|
return 0
|
|
}
|
|
// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
|
|
return v / 512
|
|
}
|
|
|
|
// The reassociate rules "x - (z + C) -> (x - z) - C" and
|
|
// "(z + C) -x -> C + (z - x)" can optimize the following cases.
|
|
func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
|
|
// arm64:"SUB","ADD\t[$]2"
|
|
// ppc64x:"SUB","ADD\t[$]2"
|
|
r0 := (i0 + 3) - (j0 + 1)
|
|
// arm64:"SUB","SUB\t[$]4"
|
|
// ppc64x:"SUB","ADD\t[$]-4"
|
|
r1 := (i1 - 3) - (j1 + 1)
|
|
// arm64:"SUB","ADD\t[$]4"
|
|
// ppc64x:"SUB","ADD\t[$]4"
|
|
r2 := (i2 + 3) - (j2 - 1)
|
|
// arm64:"SUB","SUB\t[$]2"
|
|
// ppc64x:"SUB","ADD\t[$]-2"
|
|
r3 := (i3 - 3) - (j3 - 1)
|
|
return r0, r1, r2, r3
|
|
}
|
|
|
|
// The reassociate rules "x - (z + C) -> (x - z) - C" and
|
|
// "(C - z) - x -> C - (z + x)" can optimize the following cases.
|
|
func constantFold2(i0, j0, i1, j1 int) (int, int) {
|
|
// arm64:"ADD","MOVD\t[$]2","SUB"
|
|
// ppc64x: `SUBC\tR[0-9]+,\s[$]2,\sR`
|
|
r0 := (3 - i0) - (j0 + 1)
|
|
// arm64:"ADD","MOVD\t[$]4","SUB"
|
|
// ppc64x: `SUBC\tR[0-9]+,\s[$]4,\sR`
|
|
r1 := (3 - i1) - (j1 - 1)
|
|
return r0, r1
|
|
}
|
|
|
|
func constantFold3(i, j int) int {
|
|
// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
|
|
// ppc64x:"MULLD\t[$]30","MULLD"
|
|
r := (5 * i) * (6 * j)
|
|
return r
|
|
}
|
|
|
|
// ----------------- //
|
|
// Integer Min/Max //
|
|
// ----------------- //
|
|
|
|
func Int64Min(a, b int64) int64 {
|
|
// amd64: "CMPQ","CMOVQLT"
|
|
// arm64: "CMP","CSEL"
|
|
// riscv64/rva20u64:"BLT\t"
|
|
// riscv64/rva22u64:"MIN\t"
|
|
return min(a, b)
|
|
}
|
|
|
|
func Int64Max(a, b int64) int64 {
|
|
// amd64: "CMPQ","CMOVQGT"
|
|
// arm64: "CMP","CSEL"
|
|
// riscv64/rva20u64:"BLT\t"
|
|
// riscv64/rva22u64:"MAX\t"
|
|
return max(a, b)
|
|
}
|
|
|
|
func Uint64Min(a, b uint64) uint64 {
|
|
// amd64: "CMPQ","CMOVQCS"
|
|
// arm64: "CMP","CSEL"
|
|
// riscv64/rva20u64:"BLTU"
|
|
// riscv64/rva22u64:"MINU"
|
|
return min(a, b)
|
|
}
|
|
|
|
func Uint64Max(a, b uint64) uint64 {
|
|
// amd64: "CMPQ","CMOVQHI"
|
|
// arm64: "CMP","CSEL"
|
|
// riscv64/rva20u64:"BLTU"
|
|
// riscv64/rva22u64:"MAXU"
|
|
return max(a, b)
|
|
}
|