mirror of
https://github.com/golang/go.git
synced 2025-05-05 15:43:04 +00:00
cmd/compile: prefer an add when shifting left by 1
ADD(Q|L) has generally twice the throughput. Came up in CL 626998. Throughput by arch: Zen 4: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Intel Alder Lake: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.2 Intel Haswell: SHLL (R64, 1): 0.5 ADD (R64, R64): 0.25 Also include a minor opt for: (x + x) << c -> x << (c + 1) Before this, the code: func addShift(x int64) int64 { return (x + x) << 1 } emitted two instructions: ADDQ AX, AX SHLQ $1, AX but we can do it in a single shift: SHLQ $2, AX Add a codegen test for clearing the last bit. compilecmp linux/amd64: math math.sqrt 243 -> 242 (-0.41%) math [cmd/compile] math.sqrt 243 -> 242 (-0.41%) runtime runtime.selectgo 5455 -> 5445 (-0.18%) runtime.sysargs 665 -> 662 (-0.45%) runtime.isPinned 145 -> 141 (-2.76%) runtime.atoi64 198 -> 194 (-2.02%) runtime.setPinned 714 -> 709 (-0.70%) runtime [cmd/compile] runtime.sysargs 665 -> 662 (-0.45%) runtime.setPinned 714 -> 709 (-0.70%) runtime.atoi64 198 -> 194 (-2.02%) runtime.isPinned 145 -> 141 (-2.76%) strconv strconv.computeBounds 109 -> 107 (-1.83%) strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv [cmd/compile] strconv.FormatInt 201 -> 197 (-1.99%) strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%) strconv.ryuDigits32 490 -> 488 (-0.41%) strconv.AppendUint 342 -> 340 (-0.58%) strconv.computeBounds 109 -> 107 (-1.83%) strconv.small 144 -> 134 (-6.94%) strconv.AppendInt 357 -> 344 (-3.64%) image image.Rectangle.Inset 101 -> 97 (-3.96%) regexp/syntax regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) time time.Time.Before 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.Time.Sub 444 -> 425 (-4.28%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.div 592 -> 587 (-0.84%) time.Time.UnixNano 85 -> 78 (-8.24%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.After 179 -> 161 (-10.06%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.(*Time).IsZero 113 -> 108 (-4.42%) regexp regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) regexp/syntax [cmd/compile] regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%) regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%) regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%) regexp/syntax.ranges.Less 153 -> 150 (-1.96%) encoding/base64 encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) time [cmd/compile] time.(*Time).IsZero 113 -> 108 (-4.42%) time.Time.IsZero 74 -> 66 (-10.81%) time.(*Time).UnixNano 124 -> 113 (-8.87%) time.Time.UnixMilli 106 -> 95 (-10.38%) time.Time.Equal 170 -> 150 (-11.76%) time.Time.UnixMicro 106 -> 95 (-10.38%) time.(*Time).UnixMicro 141 -> 140 (-0.71%) time.Time.Before 179 -> 161 (-10.06%) time.Time.UnixNano 85 -> 78 (-8.24%) time.Time.AppendBinary 766 -> 757 (-1.17%) time.div 592 -> 587 (-0.84%) time.Time.After 179 -> 161 (-10.06%) time.Time.Compare 189 -> 166 (-12.17%) time.(*Time).UnixMilli 141 -> 140 (-0.71%) time.Time.Sub 444 -> 425 (-4.28%) index/suffixarray index/suffixarray.sais_8_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_32 1677 -> 1645 (-1.91%) index/suffixarray.sais_64 1677 -> 1654 (-1.37%) index/suffixarray.sais_8_64 1677 -> 1654 (-1.37%) index/suffixarray.writeInt 249 -> 247 (-0.80%) os os.Expand 1070 -> 1051 (-1.78%) os.Chtimes 787 -> 774 (-1.65%) regexp [cmd/compile] regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%) regexp.QuoteMeta 485 -> 469 (-3.30%) encoding/base64 [cmd/compile] encoding/base64.decodedLen 92 -> 90 (-2.17%) encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%) encoding/hex encoding/hex.Encode 138 -> 136 (-1.45%) encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) crypto/des crypto/des.initFeistelBox 235 -> 229 (-2.55%) crypto/des.cryptBlock 549 -> 538 (-2.00%) os [cmd/compile] os.Chtimes 787 -> 774 (-1.65%) os.Expand 1070 -> 1051 (-1.78%) math/big math/big.newFloat 238 -> 223 (-6.30%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.basicSqr 1032 -> 1017 (-1.45%) cmd/vendor/golang.org/x/sys/unix cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66 (-8.33%) encoding/json encoding/json.Indent 404 -> 403 (-0.25%) encoding/json.MarshalIndent 303 -> 297 (-1.98%) testing testing.(*T).Deadline 84 -> 82 (-2.38%) testing.(*M).Run 3545 -> 3525 (-0.56%) archive/zip archive/zip.headerFileInfo.ModTime 229 -> 223 (-2.62%) encoding/gob encoding/gob.(*encoderState).encodeInt 474 -> 469 (-1.05%) crypto/elliptic crypto/elliptic.Marshal 728 -> 714 (-1.92%) debug/buildinfo debug/buildinfo.readString 325 -> 315 (-3.08%) image/png image/png.(*decoder).readImagePass 10866 -> 10834 (-0.29%) archive/tar archive/tar.Header.allowedFormats.func3 1768 -> 1736 (-1.81%) archive/tar.formatPAXTime 389 -> 358 (-7.97%) archive/tar.(*Writer).writeGNUHeader 741 -> 727 (-1.89%) archive/tar.readGNUSparseMap0x1 709 -> 695 (-1.97%) archive/tar.(*Writer).templateV7Plus 915 -> 909 (-0.66%) crypto/internal/cryptotest crypto/internal/cryptotest.TestHash.func4 890 -> 879 (-1.24%) crypto/internal/cryptotest.TestStream.func6.1 646 -> 645 (-0.15%) crypto/internal/cryptotest.testCipher.func3 1300 -> 1289 (-0.85%) internal/pkgbits internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) testing/quick testing/quick.(*Config).getRand 316 -> 315 (-0.32%) log/slog log/slog.TimeValue 489 -> 479 (-2.04%) runtime/pprof runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) internal/coverage/cfile internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822 (-0.24%) internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892 (-1.33%) cmd/internal/objabi cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) crypto/ecdsa crypto/ecdsa.pointFromAffine 1162 -> 1144 (-1.55%) net net.minNonzeroTime 313 -> 308 (-1.60%) net.cgoLookupAddrPTR 812 -> 797 (-1.85%) net.(*IPNet).String 851 -> 827 (-2.82%) net.IP.AppendText 488 -> 471 (-3.48%) net.IPMask.String 281 -> 270 (-3.91%) net.partialDeadline 374 -> 366 (-2.14%) net.hexString 249 -> 240 (-3.61%) net.IP.String 454 -> 453 (-0.22%) internal/fuzz internal/fuzz.newPcgRand 240 -> 234 (-2.50%) crypto/x509 crypto/x509.(*Certificate).isValid 2642 -> 2611 (-1.17%) cmd/internal/obj/s390x cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) encoding/hex [cmd/compile] encoding/hex.(*decoder).Read 830 -> 824 (-0.72%) encoding/hex.Encode 138 -> 136 (-1.45%) cmd/internal/objabi [cmd/compile] cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%) math/big [cmd/compile] math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%) math/big.nat.mul 2138 -> 2122 (-0.75%) math/big.karatsubaSqr 1372 -> 1369 (-0.22%) math/big.basicSqr 1032 -> 1017 (-1.45%) math/big.newFloat 238 -> 223 (-6.30%) encoding/json [cmd/compile] encoding/json.MarshalIndent 303 -> 297 (-1.98%) encoding/json.Indent 404 -> 403 (-0.25%) cmd/covdata main.(*metaMerge).emitCounters 985 -> 973 (-1.22%) runtime/pprof [cmd/compile] runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%) cmd/compile/internal/syntax cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/dist main.runInstall 19081 -> 19049 (-0.17%) crypto/tls crypto/tls.extractPadding 176 -> 175 (-0.57%) slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247 (-2.37%) slices.Clone[[]uint16,uint16] 253 -> 247 (-2.37%) slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247 (-2.37%) crypto/tls.(*Config).cipherSuites 335 -> 326 (-2.69%) slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434 (-0.69%) crypto/tls.dial 1349 -> 1339 (-0.74%) slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434 (-0.69%) internal/pkgbits [cmd/compile] internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%) internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%) cmd/compile/internal/syntax [cmd/compile] cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%) cmd/internal/obj/s390x [cmd/compile] cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%) cmd/go/internal/trace cmd/go/internal/trace.Flow 910 -> 886 (-2.64%) cmd/go/internal/trace.(*Span).Done 311 -> 304 (-2.25%) cmd/go/internal/trace.StartSpan 620 -> 615 (-0.81%) cmd/internal/script cmd/internal/script.(*Engine).Execute.func2 534 -> 528 (-1.12%) cmd/link/internal/loader cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338 (-1.74%) net/http net/http.(*Transport).queueForIdleConn 1797 -> 1766 (-1.73%) net/http.(*Transport).getConn 2149 -> 2131 (-0.84%) net/http.(*http2ClientConn).tooIdleLocked 207 -> 197 (-4.83%) net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508 (-2.31%) net/http.(*Cookie).Valid 837 -> 818 (-2.27%) net/http.(*http2responseWriter).SetReadDeadline 373 -> 357 (-4.29%) net/http.checkIfRange 701 -> 690 (-1.57%) net/http.(*http2SettingsFrame).Value 325 -> 298 (-8.31%) net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767 (-1.29%) net/http.(*Server).Serve 1746 -> 1739 (-0.40%) net/http.http2traceGotConn 569 -> 556 (-2.28%) net/http/pprof net/http/pprof.collectProfile 242 -> 239 (-1.24%) cmd/compile/internal/coverage cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/telemetry/internal/upload cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540 (-0.66%) cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572 (-0.89%) cmd/compile/internal/coverage [cmd/compile] cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%) cmd/vendor/golang.org/x/text/language cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284 (-1.05%) cmd/go/internal/vcweb cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041 (-0.38%) cmd/go/internal/vcs cmd/go/internal/vcs.expand 761 -> 741 (-2.63%) cmd/compile/internal/inline/inlheur slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/compile/internal/inline/inlheur [cmd/compile] slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%) cmd/go/internal/modfetch/codehost cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213 (-0.18%) cmd/link/internal/ld cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152 (-3.18%) cmd/link/internal/ld.(*Link).address 12559 -> 12495 (-0.51%) cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205 (-0.76%) cmd/link/internal/ld.elfshreloc 618 -> 616 (-0.32%) cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779 (-1.89%) cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663 (-0.75%) cmd/link/internal/ld.relocSectFn 285 -> 284 (-0.35%) cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144 (-1.37%) cmd/link/internal/ld.decodetypeArrayLen 157 -> 152 (-3.18%) cmd/link/internal/arm64 cmd/link/internal/arm64.gensymlate.func1 895 -> 888 (-0.78%) cmd/go/internal/modload cmd/go/internal/modload.queryProxy.func3 1029 -> 1012 (-1.65%) cmd/go/internal/load cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447 (-0.07%) cmd/go/internal/clean cmd/go/internal/clean.runClean 2120 -> 2104 (-0.75%) cmd/compile/internal/ssa cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294 (-0.96%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724 (-0.33%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa [cmd/compile] cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%) cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%) cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%) cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%) file before after Δ % math/bits.s 2352 2354 +2 +0.085% math/bits [cmd/compile].s 2352 2354 +2 +0.085% math.s 35675 35674 -1 -0.003% math [cmd/compile].s 35675 35674 -1 -0.003% runtime.s 577251 577245 -6 -0.001% runtime [cmd/compile].s 642419 642438 +19 +0.003% sort.s 37434 37435 +1 +0.003% strconv.s 48391 48343 -48 -0.099% sort [cmd/compile].s 37434 37435 +1 +0.003% bufio.s 21386 21418 +32 +0.150% strconv [cmd/compile].s 48391 48343 -48 -0.099% image.s 34978 35022 +44 +0.126% regexp/syntax.s 81719 81781 +62 +0.076% time.s 94341 94184 -157 -0.166% regexp.s 60411 60399 -12 -0.020% bufio [cmd/compile].s 21512 21544 +32 +0.149% encoding/binary.s 34062 34087 +25 +0.073% regexp/syntax [cmd/compile].s 81719 81781 +62 +0.076% encoding/base64.s 11907 11903 -4 -0.034% time [cmd/compile].s 94341 94184 -157 -0.166% index/suffixarray.s 41633 41527 -106 -0.255% os.s 101770 101738 -32 -0.031% regexp [cmd/compile].s 60411 60399 -12 -0.020% encoding/binary [cmd/compile].s 37173 37198 +25 +0.067% encoding/base64 [cmd/compile].s 11907 11903 -4 -0.034% os/exec.s 23900 23907 +7 +0.029% encoding/hex.s 6038 6030 -8 -0.132% crypto/des.s 5073 5056 -17 -0.335% os [cmd/compile].s 102030 101998 -32 -0.031% vendor/golang.org/x/net/http2/hpack.s 22027 22033 +6 +0.027% math/big.s 164808 164753 -55 -0.033% cmd/vendor/golang.org/x/sys/unix.s 121450 121444 -6 -0.005% encoding/json.s 110294 110287 -7 -0.006% testing.s 115303 115281 -22 -0.019% archive/zip.s 65329 65325 -4 -0.006% os/user.s 10078 10080 +2 +0.020% encoding/gob.s 143788 143783 -5 -0.003% crypto/elliptic.s 30686 30704 +18 +0.059% go/doc/comment.s 49401 49433 +32 +0.065% debug/buildinfo.s 9095 9085 -10 -0.110% image/png.s 36113 36081 -32 -0.089% archive/tar.s 71994 71897 -97 -0.135% crypto/internal/cryptotest.s 60872 60849 -23 -0.038% internal/pkgbits.s 20441 20429 -12 -0.059% testing/quick.s 8236 8235 -1 -0.012% log/slog.s 77568 77558 -10 -0.013% internal/trace/internal/oldtrace.s 52885 52896 +11 +0.021% runtime/pprof.s 123978 123969 -9 -0.007% internal/coverage/cfile.s 25198 25184 -14 -0.056% cmd/internal/objabi.s 19954 19946 -8 -0.040% crypto/ecdsa.s 29159 29141 -18 -0.062% log/slog/internal/benchmarks.s 6694 6695 +1 +0.015% net.s 299569 299503 -66 -0.022% os/exec [cmd/compile].s 23888 23895 +7 +0.029% internal/trace.s 179226 179240 +14 +0.008% internal/fuzz.s 86190 86191 +1 +0.001% crypto/x509.s 177195 177164 -31 -0.017% cmd/internal/obj/s390x.s 121642 121610 -32 -0.026% cmd/internal/obj/ppc64.s 140118 140122 +4 +0.003% encoding/hex [cmd/compile].s 6149 6141 -8 -0.130% cmd/internal/objabi [cmd/compile].s 19954 19946 -8 -0.040% cmd/internal/obj/arm64.s 158523 158555 +32 +0.020% go/doc/comment [cmd/compile].s 49512 49544 +32 +0.065% math/big [cmd/compile].s 166394 166339 -55 -0.033% encoding/json [cmd/compile].s 110712 110705 -7 -0.006% cmd/covdata.s 39699 39687 -12 -0.030% runtime/pprof [cmd/compile].s 125209 125200 -9 -0.007% cmd/compile/internal/syntax.s 181755 181736 -19 -0.010% cmd/dist.s 177893 177861 -32 -0.018% crypto/tls.s 389157 389113 -44 -0.011% internal/pkgbits [cmd/compile].s 41644 41632 -12 -0.029% cmd/compile/internal/syntax [cmd/compile].s 196105 196086 -19 -0.010% cmd/compile/internal/types.s 71315 71345 +30 +0.042% cmd/internal/obj/s390x [cmd/compile].s 121733 121701 -32 -0.026% cmd/go/internal/trace.s 4796 4760 -36 -0.751% cmd/internal/obj/arm64 [cmd/compile].s 168120 168147 +27 +0.016% cmd/internal/obj/ppc64 [cmd/compile].s 140219 140223 +4 +0.003% cmd/internal/script.s 83442 83436 -6 -0.007% cmd/link/internal/loader.s 93299 93294 -5 -0.005% net/http.s 620639 620472 -167 -0.027% net/http/pprof.s 35016 35013 -3 -0.009% cmd/compile/internal/coverage.s 6668 6667 -1 -0.015% cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210 34148 -62 -0.181% cmd/compile/internal/coverage [cmd/compile].s 6664 6663 -1 -0.015% cmd/vendor/golang.org/x/text/language.s 48077 48074 -3 -0.006% cmd/go/internal/vcweb.s 45193 45189 -4 -0.009% cmd/go/internal/vcs.s 44749 44729 -20 -0.045% cmd/compile/internal/inline/inlheur.s 83758 83742 -16 -0.019% cmd/compile/internal/inline/inlheur [cmd/compile].s 84773 84757 -16 -0.019% cmd/go/internal/modfetch/codehost.s 89098 89094 -4 -0.004% cmd/trace.s 257550 257564 +14 +0.005% cmd/link/internal/ld.s 641945 641706 -239 -0.037% cmd/link/internal/arm64.s 34805 34798 -7 -0.020% cmd/go/internal/modload.s 328971 328954 -17 -0.005% cmd/go/internal/load.s 178877 178871 -6 -0.003% cmd/go/internal/clean.s 11006 10990 -16 -0.145% cmd/compile/internal/ssa.s 3552843 3553347 +504 +0.014% cmd/compile/internal/ssa [cmd/compile].s 3752511 3753123 +612 +0.016% total 36179015 36178687 -328 -0.001% Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5 Reviewed-on: https://go-review.googlesource.com/c/go/+/641757 Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
a8e532b0f2
commit
cd595be6d6
@ -664,9 +664,11 @@
|
||||
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
|
||||
// and further combining shifts.
|
||||
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
|
||||
(BT(Q|L)const [c] (ADDQ x x)) && c>1 => (BT(Q|L)const [c-1] x)
|
||||
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
|
||||
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
|
||||
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
|
||||
(BTLconst [c] (ADDL x x)) && c>1 => (BTLconst [c-1] x)
|
||||
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
|
||||
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
|
||||
|
||||
@ -702,11 +704,11 @@
|
||||
// We thus special-case them, by detecting the shift patterns.
|
||||
|
||||
// Special case resetting first/last bit
|
||||
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
|
||||
(ADD(L|Q) (SHR(L|Q)const [1] x) (SHR(L|Q)const [1] x))
|
||||
=> (AND(L|Q)const [-2] x)
|
||||
(SHRLconst [1] (SHLLconst [1] x))
|
||||
(SHRLconst [1] (ADDL x x))
|
||||
=> (ANDLconst [0x7fffffff] x)
|
||||
(SHRQconst [1] (SHLQconst [1] x))
|
||||
(SHRQconst [1] (ADDQ x x))
|
||||
=> (BTRQconst [63] x)
|
||||
|
||||
// Special case testing first/last bit (with double-shift generated by generic.rules)
|
||||
@ -933,17 +935,19 @@
|
||||
(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x))
|
||||
(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x))
|
||||
|
||||
// Prefer addition when shifting left by one
|
||||
(SHL(Q|L)const [1] x) => (ADD(Q|L) x x)
|
||||
|
||||
// combine add/shift into LEAQ/LEAL
|
||||
(ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y)
|
||||
(ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y)
|
||||
(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y)
|
||||
(ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y)
|
||||
(ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x)
|
||||
|
||||
// combine ADDQ/ADDQconst into LEAQ1/LEAL1
|
||||
(ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y)
|
||||
(ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y)
|
||||
(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x)
|
||||
(ADD(Q|L)const [c] (ADD(Q|L) x x)) => (LEA(Q|L)1 [c] x x)
|
||||
|
||||
// fold ADDQ/ADDL into LEAQ/LEAL
|
||||
(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
|
||||
@ -965,12 +969,18 @@
|
||||
(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y)
|
||||
|
||||
// fold shifts into LEAQx/LEALx
|
||||
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y)
|
||||
(LEA(Q|L)1 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)2 [c] {s} x y)
|
||||
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y)
|
||||
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y)
|
||||
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y)
|
||||
(LEA(Q|L)2 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)4 [c] {s} x y)
|
||||
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y)
|
||||
(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y)
|
||||
(LEA(Q|L)4 [c] {s} x (ADD(Q|L) y y)) => (LEA(Q|L)8 [c] {s} x y)
|
||||
|
||||
// (x + x) << 1 -> x << 2
|
||||
(LEA(Q|L)2 [0] {s} (ADD(Q|L) x x) x) && s == nil => (SHL(Q|L)const [2] x)
|
||||
|
||||
// (x + x) << 2 -> x << 3 and similar
|
||||
(SHL(Q|L)const [c] (ADD(Q|L) x x)) => (SHL(Q|L)const [c+1] x)
|
||||
|
||||
// reverse ordering of compare instruction
|
||||
(SETL (InvertFlags x)) => (SETG x)
|
||||
|
@ -1261,6 +1261,21 @@ func rewriteValueAMD64_OpAMD64ADCQconst(v *Value) bool {
|
||||
func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (ADDL (SHRLconst [1] x) (SHRLconst [1] x))
|
||||
// result: (ANDLconst [-2] x)
|
||||
for {
|
||||
if v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
if v_1.Op != OpAMD64SHRLconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64ANDLconst)
|
||||
v.AuxInt = int32ToAuxInt(-2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (ADDL x (MOVLconst [c]))
|
||||
// result: (ADDLconst [c] x)
|
||||
for {
|
||||
@ -1307,21 +1322,6 @@ func rewriteValueAMD64_OpAMD64ADDL(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDL x (SHLLconst [1] y))
|
||||
// result: (LEAL2 x y)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAL2)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDL x (ADDL y y))
|
||||
// result: (LEAL2 x y)
|
||||
for {
|
||||
@ -1461,14 +1461,17 @@ func rewriteValueAMD64_OpAMD64ADDLconst(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (ADDLconst [c] (SHLLconst [1] x))
|
||||
// match: (ADDLconst [c] (ADDL x x))
|
||||
// result: (LEAL1 [c] x x)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
if v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
if v_0.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64LEAL1)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.AddArg2(x, x)
|
||||
@ -1806,6 +1809,21 @@ func rewriteValueAMD64_OpAMD64ADDLmodify(v *Value) bool {
|
||||
func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (ADDQ (SHRQconst [1] x) (SHRQconst [1] x))
|
||||
// result: (ANDQconst [-2] x)
|
||||
for {
|
||||
if v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
if v_1.Op != OpAMD64SHRQconst || auxIntToInt8(v_1.AuxInt) != 1 || x != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64ANDQconst)
|
||||
v.AuxInt = int32ToAuxInt(-2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (ADDQ x (MOVQconst <t> [c]))
|
||||
// cond: is32Bit(c) && !t.IsPtr()
|
||||
// result: (ADDQconst [int32(c)] x)
|
||||
@ -1873,21 +1891,6 @@ func rewriteValueAMD64_OpAMD64ADDQ(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDQ x (SHLQconst [1] y))
|
||||
// result: (LEAQ2 x y)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAQ2)
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDQ x (ADDQ y y))
|
||||
// result: (LEAQ2 x y)
|
||||
for {
|
||||
@ -2052,14 +2055,17 @@ func rewriteValueAMD64_OpAMD64ADDQconst(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (ADDQconst [c] (SHLQconst [1] x))
|
||||
// match: (ADDQconst [c] (ADDQ x x))
|
||||
// result: (LEAQ1 [c] x x)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
if v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
if v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64LEAQ1)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.AddArg2(x, x)
|
||||
@ -3637,6 +3643,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTLconst [c] (ADDQ x x))
|
||||
// cond: c>1
|
||||
// result: (BTLconst [c-1] x)
|
||||
for {
|
||||
c := auxIntToInt8(v.AuxInt)
|
||||
if v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] || !(c > 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64BTLconst)
|
||||
v.AuxInt = int8ToAuxInt(c - 1)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTLconst [c] (SHLQconst [d] x))
|
||||
// cond: c>d
|
||||
// result: (BTLconst [c-d] x)
|
||||
@ -3689,6 +3712,23 @@ func rewriteValueAMD64_OpAMD64BTLconst(v *Value) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTLconst [c] (ADDL x x))
|
||||
// cond: c>1
|
||||
// result: (BTLconst [c-1] x)
|
||||
for {
|
||||
c := auxIntToInt8(v.AuxInt)
|
||||
if v_0.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] || !(c > 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64BTLconst)
|
||||
v.AuxInt = int8ToAuxInt(c - 1)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTLconst [c] (SHLLconst [d] x))
|
||||
// cond: c>d
|
||||
// result: (BTLconst [c-d] x)
|
||||
@ -3761,6 +3801,23 @@ func rewriteValueAMD64_OpAMD64BTQconst(v *Value) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTQconst [c] (ADDQ x x))
|
||||
// cond: c>1
|
||||
// result: (BTQconst [c-1] x)
|
||||
for {
|
||||
c := auxIntToInt8(v.AuxInt)
|
||||
if v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] || !(c > 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64BTQconst)
|
||||
v.AuxInt = int8ToAuxInt(c - 1)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (BTQconst [c] (SHLQconst [d] x))
|
||||
// cond: c>d
|
||||
// result: (BTQconst [c-d] x)
|
||||
@ -8287,17 +8344,20 @@ func rewriteValueAMD64_OpAMD64LEAL1(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (LEAL1 [c] {s} x (SHLLconst [1] y))
|
||||
// match: (LEAL1 [c] {s} x (ADDL y y))
|
||||
// result: (LEAL2 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDL {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAL2)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -8391,16 +8451,19 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAL2 [c] {s} x (SHLLconst [1] y))
|
||||
// match: (LEAL2 [c] {s} x (ADDL y y))
|
||||
// result: (LEAL4 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAL4)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -8423,6 +8486,26 @@ func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAL2 [0] {s} (ADDL x x) x)
|
||||
// cond: s == nil
|
||||
// result: (SHLLconst [2] x)
|
||||
for {
|
||||
if auxIntToInt32(v.AuxInt) != 0 {
|
||||
break
|
||||
}
|
||||
s := auxToSym(v.Aux)
|
||||
if v_0.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] || x != v_1 || !(s == nil) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLLconst)
|
||||
v.AuxInt = int8ToAuxInt(2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool {
|
||||
@ -8470,16 +8553,19 @@ func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAL4 [c] {s} x (SHLLconst [1] y))
|
||||
// match: (LEAL4 [c] {s} x (ADDL y y))
|
||||
// result: (LEAL8 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAL8)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -8721,17 +8807,20 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool {
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
|
||||
// match: (LEAQ1 [c] {s} x (ADDQ y y))
|
||||
// result: (LEAQ2 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDQ {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
continue
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAQ2)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -8924,16 +9013,19 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
|
||||
// match: (LEAQ2 [c] {s} x (ADDQ y y))
|
||||
// result: (LEAQ4 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAQ4)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -8956,6 +9048,26 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAQ2 [0] {s} (ADDQ x x) x)
|
||||
// cond: s == nil
|
||||
// result: (SHLQconst [2] x)
|
||||
for {
|
||||
if auxIntToInt32(v.AuxInt) != 0 {
|
||||
break
|
||||
}
|
||||
s := auxToSym(v.Aux)
|
||||
if v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] || x != v_1 || !(s == nil) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = int8ToAuxInt(2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB
|
||||
// result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
|
||||
@ -9087,16 +9199,19 @@ func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool {
|
||||
v.AddArg2(x, y)
|
||||
return true
|
||||
}
|
||||
// match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
|
||||
// match: (LEAQ4 [c] {s} x (ADDQ y y))
|
||||
// result: (LEAQ8 [c] {s} x y)
|
||||
for {
|
||||
c := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 1 {
|
||||
if v_1.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
if y != v_1.Args[0] {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v.AuxInt = int32ToAuxInt(c)
|
||||
v.Aux = symToAux(s)
|
||||
@ -20736,18 +20851,6 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (SHLLconst [1] (SHRLconst [1] x))
|
||||
// result: (ANDLconst [-2] x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRLconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64ANDLconst)
|
||||
v.AuxInt = int32ToAuxInt(-2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLLconst x [0])
|
||||
// result: x
|
||||
for {
|
||||
@ -20758,6 +20861,33 @@ func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLLconst [1] x)
|
||||
// result: (ADDL x x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0
|
||||
v.reset(OpAMD64ADDL)
|
||||
v.AddArg2(x, x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLLconst [c] (ADDL x x))
|
||||
// result: (SHLLconst [c+1] x)
|
||||
for {
|
||||
c := auxIntToInt8(v.AuxInt)
|
||||
if v_0.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLLconst)
|
||||
v.AuxInt = int8ToAuxInt(c + 1)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLLconst [d] (MOVLconst [c]))
|
||||
// result: (MOVLconst [c << uint64(d)])
|
||||
for {
|
||||
@ -20992,18 +21122,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (SHLQconst [1] (SHRQconst [1] x))
|
||||
// result: (ANDQconst [-2] x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHRQconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64ANDQconst)
|
||||
v.AuxInt = int32ToAuxInt(-2)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLQconst x [0])
|
||||
// result: x
|
||||
for {
|
||||
@ -21014,6 +21132,33 @@ func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool {
|
||||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLQconst [1] x)
|
||||
// result: (ADDQ x x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 {
|
||||
break
|
||||
}
|
||||
x := v_0
|
||||
v.reset(OpAMD64ADDQ)
|
||||
v.AddArg2(x, x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLQconst [c] (ADDQ x x))
|
||||
// result: (SHLQconst [c+1] x)
|
||||
for {
|
||||
c := auxIntToInt8(v.AuxInt)
|
||||
if v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = int8ToAuxInt(c + 1)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (SHLQconst [d] (MOVQconst [c]))
|
||||
// result: (MOVQconst [c << uint64(d)])
|
||||
for {
|
||||
@ -21419,13 +21564,16 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (SHRLconst [1] (SHLLconst [1] x))
|
||||
// match: (SHRLconst [1] (ADDL x x))
|
||||
// result: (ANDLconst [0x7fffffff] x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLLconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64ANDLconst)
|
||||
v.AuxInt = int32ToAuxInt(0x7fffffff)
|
||||
v.AddArg(x)
|
||||
@ -21663,13 +21811,16 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (SHRQconst [1] (SHLQconst [1] x))
|
||||
// match: (SHRQconst [1] (ADDQ x x))
|
||||
// result: (BTRQconst [63] x)
|
||||
for {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64SHLQconst || auxIntToInt8(v_0.AuxInt) != 1 {
|
||||
if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[1]
|
||||
if x != v_0.Args[0] {
|
||||
break
|
||||
}
|
||||
x := v_0.Args[0]
|
||||
v.reset(OpAMD64BTRQconst)
|
||||
v.AuxInt = int8ToAuxInt(63)
|
||||
v.AddArg(x)
|
||||
|
@ -185,6 +185,15 @@ func Pow2Muls(n1, n2 int) (int, int) {
|
||||
return a, b
|
||||
}
|
||||
|
||||
func Mul_2(n1 int32, n2 int64) (int32, int64) {
|
||||
// amd64:"ADDL", -"SHLL"
|
||||
a := n1 * 2
|
||||
// amd64:"ADDQ", -"SHLQ"
|
||||
b := n2 * 2
|
||||
|
||||
return a, b
|
||||
}
|
||||
|
||||
func Mul_96(n int) int {
|
||||
// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
|
||||
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
|
||||
|
@ -120,6 +120,16 @@ func bitoff64(a, b uint64) (n uint64) {
|
||||
return n
|
||||
}
|
||||
|
||||
func clearLastBit(x int64, y int32) (int64, int32) {
|
||||
// amd64:"ANDQ\t[$]-2"
|
||||
a := (x >> 1) << 1
|
||||
|
||||
// amd64:"ANDL\t[$]-2"
|
||||
b := (y >> 1) << 1
|
||||
|
||||
return a, b
|
||||
}
|
||||
|
||||
func bitcompl64(a, b uint64) (n uint64) {
|
||||
// amd64:"BTCQ"
|
||||
n += b ^ (1 << (a & 63))
|
||||
|
@ -58,6 +58,16 @@ func rshConst64x64Overflow8(v int8) int64 {
|
||||
return int64(v) >> 8
|
||||
}
|
||||
|
||||
func lshConst32x1(v int32) int32 {
|
||||
// amd64:"ADDL", -"SHLL"
|
||||
return v << 1
|
||||
}
|
||||
|
||||
func lshConst64x1(v int64) int64 {
|
||||
// amd64:"ADDQ", -"SHLQ"
|
||||
return v << 1
|
||||
}
|
||||
|
||||
func lshConst32x64(v int32) int32 {
|
||||
// ppc64x:"SLW"
|
||||
// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
|
||||
@ -94,6 +104,26 @@ func rshConst64x32(v int64) int64 {
|
||||
return v >> uint32(33)
|
||||
}
|
||||
|
||||
func lshConst32x1Add(x int32) int32 {
|
||||
// amd64:"SHLL\t[$]2"
|
||||
return (x + x) << 1
|
||||
}
|
||||
|
||||
func lshConst64x1Add(x int64) int64 {
|
||||
// amd64:"SHLQ\t[$]2"
|
||||
return (x + x) << 1
|
||||
}
|
||||
|
||||
func lshConst32x2Add(x int32) int32 {
|
||||
// amd64:"SHLL\t[$]3"
|
||||
return (x + x) << 2
|
||||
}
|
||||
|
||||
func lshConst64x2Add(x int64) int64 {
|
||||
// amd64:"SHLQ\t[$]3"
|
||||
return (x + x) << 2
|
||||
}
|
||||
|
||||
// ------------------ //
|
||||
// masked shifts //
|
||||
// ------------------ //
|
||||
|
Loading…
x
Reference in New Issue
Block a user