Merge "Merge branch 'dev.ssa' into mergebranch"

2025-05-29 03:11:26 +00:00 · 2016-03-01 21:36:45 +00:00 · 2016-03-01 21:36:45 +00:00 · a6fb2aede7
commit a6fb2aede7
parent 998aaf8a64 9d854fd44a
148 changed files with 69628 additions and 207 deletions
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@ -127,6 +127,9 @@ var amd64OperandTests = []operandTest{
 	{"(SI)(BX*1)", "(SI)(BX*1)"},
 	{"(SI)(DX*1)", "(SI)(DX*1)"},
 	{"(SP)", "(SP)"},
+	{"(SP)(AX*4)", "(SP)(AX*4)"},
+	{"32(SP)(BX*2)", "32(SP)(BX*2)"},
+	{"32323(SP)(R8*4)", "32323(SP)(R8*4)"},
 	{"+3(PC)", "3(PC)"},
 	{"-1(DI)(BX*1)", "-1(DI)(BX*1)"},
 	{"-3(PC)", "-3(PC)"},
--- a/src/cmd/asm/internal/asm/testdata/amd64.s
+++ b/src/cmd/asm/internal/asm/testdata/amd64.s
@ -127,5 +127,19 @@ loop:
 	MOVNTDQ	X1, (AX)	// MOVNTO X1, (AX)
 	MOVOA	(AX), X1	// MOVO (AX), X1

+// Tests for SP indexed addresses.
+	MOVQ	foo(SP)(AX*1), BX		// 488b1c04
+	MOVQ	foo+32(SP)(CX*2), DX		// 488b544c20
+	MOVQ	foo+32323(SP)(R8*4), R9		// 4e8b8c84437e0000
+	MOVL	foo(SP)(SI*8), DI		// 8b3cf4
+	MOVL	foo+32(SP)(R10*1), R11		// 468b5c1420
+	MOVL	foo+32323(SP)(R12*2), R13	// 468bac64437e0000
+	MOVW	foo(SP)(AX*4), R8		// 66448b0484
+	MOVW	foo+32(SP)(R9*8), CX		// 66428b4ccc20
+	MOVW	foo+32323(SP)(AX*1), DX		// 668b9404437e0000
+	MOVB	foo(SP)(AX*2), AL		// 8a0444
+	MOVB	foo+32(SP)(CX*4), AH		// 8a648c20
+	MOVB	foo+32323(SP)(CX*8), R9		// 448a8ccc437e0000
+
 // LTYPE0 nonnon	{ outcode($1, &$2); }
 	RET // c3
--- a/src/cmd/asm/internal/asm/testdata/amd64error.s
+++ b/src/cmd/asm/internal/asm/testdata/amd64error.s
@ -3,5 +3,6 @@
 // license that can be found in the LICENSE file.

 TEXT errors(SB),$0
-	MOVL	foo<>(SB)(AX), AX // ERROR "invalid instruction"
+	MOVL	foo<>(SB)(AX), AX	// ERROR "invalid instruction"
+	MOVL	(AX)(SP*1), AX		// ERROR "invalid instruction"
 	RET
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@ -458,6 +458,7 @@ func (p *Package) writeDefsFunc(fgo2 io.Writer, n *Name) {
 	}

 	fmt.Fprint(fgo2, "\n")
+	fmt.Fprint(fgo2, "//go:cgo_unsafe_args\n")
 	conf.Fprint(fgo2, fset, d)
 	fmt.Fprint(fgo2, " {\n")

--- a/src/cmd/compile/internal/amd64/prog.go
+++ b/src/cmd/compile/internal/amd64/prog.go
@ -117,6 +117,7 @@ var progtable = [x86.ALAST]obj.ProgInfo{
 	x86.AJPL:       {Flags: gc.Cjmp | gc.UseCarry},
 	x86.AJPS:       {Flags: gc.Cjmp | gc.UseCarry},
 	obj.AJMP:       {Flags: gc.Jump | gc.Break | gc.KillCarry},
+	x86.ALEAW:      {Flags: gc.LeftAddr | gc.RightWrite},
 	x86.ALEAL:      {Flags: gc.LeftAddr | gc.RightWrite},
 	x86.ALEAQ:      {Flags: gc.LeftAddr | gc.RightWrite},
 	x86.AMOVBLSX:   {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.Conv},
@ -167,6 +168,7 @@ var progtable = [x86.ALAST]obj.ProgInfo{
 	x86.AORW:      {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
 	x86.APOPQ:     {Flags: gc.SizeQ | gc.RightWrite},
 	x86.APUSHQ:    {Flags: gc.SizeQ | gc.LeftRead},
+	x86.APXOR:     {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
 	x86.ARCLB:     {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry | gc.UseCarry},
 	x86.ARCLL:     {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry | gc.UseCarry},
 	x86.ARCLQ:     {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry | gc.UseCarry},
--- a/src/cmd/compile/internal/gc/closure.go
+++ b/src/cmd/compile/internal/gc/closure.go
@ -588,6 +588,7 @@ func makepartialcall(fn *Node, t0 *Type, meth *Node) *Node {
 	ptr.Ullman = 1
 	ptr.Used = true
 	ptr.Name.Curfn = xfunc
+	ptr.Xoffset = 0
 	xfunc.Func.Dcl = append(xfunc.Func.Dcl, ptr)
 	var body []*Node
 	if Isptr[rcvrtype.Etype] || Isinter(rcvrtype) {
--- a/src/cmd/compile/internal/gc/fmt.go
+++ b/src/cmd/compile/internal/gc/fmt.go
@ -403,6 +403,7 @@ var etnames = []string{
 	TFORW:       "FORW",
 	TFIELD:      "FIELD",
 	TSTRING:     "STRING",
+	TUNSAFEPTR:  "TUNSAFEPTR",
 	TANY:        "ANY",
 }

--- a/src/cmd/compile/internal/gc/gen.go
+++ b/src/cmd/compile/internal/gc/gen.go
@ -142,6 +142,8 @@ func newlab(n *Node) *Label {
 	return lab
 }

+// There is a copy of checkgoto in the new SSA backend.
+// Please keep them in sync.
 func checkgoto(from *Node, to *Node) {
 	if from.Sym == to.Sym {
 		return
@ -840,7 +842,7 @@ func gen(n *Node) {
 		cgen_dcl(n.Left)

 	case OAS:
-		if gen_as_init(n) {
+		if gen_as_init(n, false) {
 			break
 		}
 		Cgen_as(n.Left, n.Right)
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@ -131,7 +131,7 @@ type Type struct {
 	Note  *string // literal string annotation

 	// TARRAY
-	Bound int64 // negative is dynamic array
+	Bound int64 // negative is slice

 	// TMAP
 	Bucket *Type // internal type representing a hash bucket
@ -759,4 +759,13 @@ var Panicindex *Node

 var panicslice *Node

+var panicdivide *Node
+
 var throwreturn *Node
+
+var growslice *Node
+
+var writebarrierptr *Node
+var typedmemmove *Node
+
+var panicdottype *Node
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@ -530,6 +530,16 @@ func newplist() *obj.Plist {
 	return pl
 }

+// nodarg does something that depends on the value of
+// fp (this was previously completely undocumented).
+//
+// fp=1 corresponds to input args
+// fp=0 corresponds to output args
+// fp=-1 is a special case of output args for a
+// specific call from walk that previously (and
+// incorrectly) passed a 1; the behavior is exactly
+// the same as it is for 1, except that PARAMOUT is
+// generated instead of PARAM.
 func nodarg(t *Type, fp int) *Node {
 	var n *Node

@ -555,7 +565,7 @@ func nodarg(t *Type, fp int) *Node {
 		Fatalf("nodarg: not field %v", t)
 	}

-	if fp == 1 {
+	if fp == 1 || fp == -1 {
 		for _, n := range Curfn.Func.Dcl {
 			if (n.Class == PPARAM || n.Class == PPARAMOUT) && !isblanksym(t.Sym) && n.Sym == t.Sym {
 				return n
@ -592,6 +602,9 @@ fp:
 	case 1: // input arg
 		n.Class = PPARAM

+	case -1: // output arg from paramstoheap
+		n.Class = PPARAMOUT
+
 	case 2: // offset output arg
 		Fatalf("shouldn't be used")
 	}
--- a/src/cmd/compile/internal/gc/init.go
+++ b/src/cmd/compile/internal/gc/init.go
@ -33,10 +33,10 @@ func renameinit() *Sym {
 // hand-craft the following initialization code
 //	var initdone· uint8 				(1)
 //	func init()					(2)
-//		if initdone· != 0 {			(3)
-//			if initdone· == 2		(4)
-//				return
-//			throw();			(5)
+//              if initdone· > 1 {                      (3)
+//                      return                          (3a)
+//		if initdone· == 1 {			(4)
+//			throw();			(4a)
 //		}
 //		initdone· = 1;				(6)
 //		// over all matching imported symbols
@ -118,22 +118,21 @@ func fninit(n *NodeList) {

 	// (3)
 	a := Nod(OIF, nil, nil)
-
-	a.Left = Nod(ONE, gatevar, Nodintconst(0))
+	a.Left = Nod(OGT, gatevar, Nodintconst(1))
+	a.Likely = 1
 	r = append(r, a)
+	// (3a)
+	a.Nbody.Set([]*Node{Nod(ORETURN, nil, nil)})

 	// (4)
 	b := Nod(OIF, nil, nil)
-
-	b.Left = Nod(OEQ, gatevar, Nodintconst(2))
-	b.Nbody.Set([]*Node{Nod(ORETURN, nil, nil)})
-	a.Nbody.Set([]*Node{b})
-
-	// (5)
-	b = syslook("throwinit", 0)
-
-	b = Nod(OCALL, b, nil)
-	a.Nbody.Append(b)
+	b.Left = Nod(OEQ, gatevar, Nodintconst(1))
+	// this actually isn't likely, but code layout is better
+	// like this: no JMP needed after the call.
+	b.Likely = 1
+	r = append(r, b)
+	// (4a)
+	b.Nbody.Set([]*Node{Nod(OCALL, syslook("throwinit", 0), nil)})

 	// (6)
 	a = Nod(OAS, gatevar, Nodintconst(1))
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@ -7,6 +7,7 @@
 package gc

 import (
+	"cmd/compile/internal/ssa"
 	"cmd/internal/obj"
 	"flag"
 	"fmt"
@ -286,6 +287,23 @@ func Main() {
 					}
 				}
 			}
+			// special case for ssa for now
+			if strings.HasPrefix(name, "ssa/") {
+				// expect form ssa/phase/flag
+				// e.g. -d=ssa/generic_cse/time
+				// _ in phase name also matches space
+				phase := name[4:]
+				flag := "debug" // default flag is debug
+				if i := strings.Index(phase, "/"); i >= 0 {
+					flag = phase[i+1:]
+					phase = phase[:i]
+				}
+				err := ssa.PhaseOption(phase, flag, val)
+				if err != "" {
+					log.Fatalf(err)
+				}
+				continue Split
+			}
 			log.Fatalf("unknown debug key -d %s\n", name)
 		}
 	}
@ -844,7 +862,7 @@ func plan9quote(s string) string {
 	return s
 }

-type Pragma uint8
+type Pragma uint16

 const (
 	Nointerface       Pragma = 1 << iota
@ -855,6 +873,7 @@ const (
 	Systemstack              // func must run on system stack
 	Nowritebarrier           // emit compiler error instead of write barrier
 	Nowritebarrierrec        // error on write barrier in this or recursive callees
+	CgoUnsafeArgs            // treat a pointer to one arg as a pointer to them all
 )

 type lexer struct {
@ -1677,6 +1696,8 @@ func (l *lexer) getlinepragma() rune {
 				Yyerror("//go:nowritebarrierrec only allowed in runtime")
 			}
 			l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier
+		case "go:cgo_unsafe_args":
+			l.pragma |= CgoUnsafeArgs
 		}
 		return c
 	}
--- a/src/cmd/compile/internal/gc/opnames.go
+++ b/src/cmd/compile/internal/gc/opnames.go
@ -160,5 +160,9 @@ var opnames = []string{
 	OLROT:            "LROT",
 	ORROTC:           "RROTC",
 	ORETJMP:          "RETJMP",
+	OPS:              "OPS",
+	OPC:              "OPC",
+	OSQRT:            "OSQRT",
+	OGETG:            "OGETG",
 	OEND:             "END",
 }
--- a/src/cmd/compile/internal/gc/order.go
+++ b/src/cmd/compile/internal/gc/order.go
@ -230,6 +230,7 @@ func cleantempnopop(mark ordermarker, order *Order, out *[]*Node) {
 		n := order.temp[i]
 		if n.Name.Keepalive {
 			n.Name.Keepalive = false
+			n.Addrtaken = true // ensure SSA keeps the n variable
 			kill = Nod(OVARLIVE, n, nil)
 			typecheck(&kill, Etop)
 			*out = append(*out, kill)
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@ -5,6 +5,7 @@
 package gc

 import (
+	"cmd/compile/internal/ssa"
 	"cmd/internal/obj"
 	"crypto/md5"
 	"fmt"
@ -341,7 +342,12 @@ func compile(fn *Node) {
 		Deferreturn = Sysfunc("deferreturn")
 		Panicindex = Sysfunc("panicindex")
 		panicslice = Sysfunc("panicslice")
+		panicdivide = Sysfunc("panicdivide")
 		throwreturn = Sysfunc("throwreturn")
+		growslice = Sysfunc("growslice")
+		writebarrierptr = Sysfunc("writebarrierptr")
+		typedmemmove = Sysfunc("typedmemmove")
+		panicdottype = Sysfunc("panicdottype")
 	}

 	lno := setlineno(fn)
@ -358,6 +364,7 @@ func compile(fn *Node) {
 	var nam *Node
 	var gcargs *Sym
 	var gclocals *Sym
+	var ssafn *ssa.Func
 	if len(fn.Nbody.Slice()) == 0 {
 		if pure_go != 0 || strings.HasPrefix(fn.Func.Nname.Sym.Name, "init.") {
 			Yyerror("missing function body for %q", fn.Func.Nname.Sym.Name)
@ -409,6 +416,11 @@ func compile(fn *Node) {
 		goto ret
 	}

+	// Build an SSA backend function.
+	if shouldssa(Curfn) {
+		ssafn = buildssa(Curfn)
+	}
+
 	continpc = nil
 	breakpc = nil

@ -471,6 +483,14 @@ func compile(fn *Node) {
 		}
 	}

+	if ssafn != nil {
+		genssa(ssafn, ptxt, gcargs, gclocals)
+		if Curfn.Func.Endlineno != 0 {
+			lineno = Curfn.Func.Endlineno
+		}
+		ssafn.Free()
+		return
+	}
 	Genslice(Curfn.Func.Enter.Slice())
 	Genslice(Curfn.Nbody.Slice())
 	gclean()
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@ -19,6 +19,7 @@ import (
 	"cmd/internal/obj"
 	"fmt"
 	"sort"
+	"strings"
 )

 const (
@ -410,7 +411,7 @@ func newcfg(firstp *obj.Prog) []*BasicBlock {

 	bb := newblock(firstp)
 	cfg = append(cfg, bb)
-	for p := firstp; p != nil; p = p.Link {
+	for p := firstp; p != nil && p.As != obj.AEND; p = p.Link {
 		Thearch.Proginfo(p)
 		if p.To.Type == obj.TYPE_BRANCH {
 			if p.To.Val == nil {
@ -438,7 +439,7 @@ func newcfg(firstp *obj.Prog) []*BasicBlock {
 	// contained instructions until a label is reached.  Add edges
 	// for branches and fall-through instructions.
 	for _, bb := range cfg {
-		for p := bb.last; p != nil; p = p.Link {
+		for p := bb.last; p != nil && p.As != obj.AEND; p = p.Link {
 			if p.Opt != nil && p != bb.last {
 				break
 			}
@ -447,6 +448,8 @@ func newcfg(firstp *obj.Prog) []*BasicBlock {
 			// Stop before an unreachable RET, to avoid creating
 			// unreachable control flow nodes.
 			if p.Link != nil && p.Link.As == obj.ARET && p.Link.Mode == 1 {
+				// TODO: remove after SSA is done.  SSA does not
+				// generate any unreachable RET instructions.
 				break
 			}

@ -1364,7 +1367,7 @@ func livenessepilogue(lv *Liveness) {
 						}
 						n = lv.vars[j]
 						if n.Class != PPARAM {
-							yyerrorl(int(p.Lineno), "internal error: %v %v recorded as live on entry", Curfn.Func.Nname, Nconv(n, obj.FmtLong))
+							yyerrorl(int(p.Lineno), "internal error: %v %v recorded as live on entry, p.Pc=%v", Curfn.Func.Nname, Nconv(n, obj.FmtLong), p.Pc)
 						}
 					}
 				}
@ -1389,8 +1392,13 @@ func livenessepilogue(lv *Liveness) {
 				if msg != nil {
 					fmt_ = ""
 					fmt_ += fmt.Sprintf("%v: live at ", p.Line())
-					if p.As == obj.ACALL && p.To.Node != nil {
-						fmt_ += fmt.Sprintf("call to %s:", ((p.To.Node).(*Node)).Sym.Name)
+					if p.As == obj.ACALL && p.To.Sym != nil {
+						name := p.To.Sym.Name
+						i := strings.Index(name, ".")
+						if i >= 0 {
+							name = name[i+1:]
+						}
+						fmt_ += fmt.Sprintf("call to %s:", name)
 					} else if p.As == obj.ACALL {
 						fmt_ += "indirect call:"
 					} else {
--- a/src/cmd/compile/internal/gc/racewalk.go
+++ b/src/cmd/compile/internal/gc/racewalk.go
@ -13,7 +13,7 @@ import (
 //
 // For flag_race it modifies the function as follows:
 //
-// 1. It inserts a call to racefuncenter at the beginning of each function.
+// 1. It inserts a call to racefuncenterfp at the beginning of each function.
 // 2. It inserts a call to racefuncexit at the end of each function.
 // 3. It inserts a call to raceread before each memory read.
 // 4. It inserts a call to racewrite before each memory write.
@ -33,7 +33,7 @@ import (
 // at best instrumentation would cause infinite recursion.
 var omit_pkgs = []string{"runtime/internal/atomic", "runtime/internal/sys", "runtime", "runtime/race", "runtime/msan"}

-// Only insert racefuncenter/racefuncexit into the following packages.
+// Only insert racefuncenterfp/racefuncexit into the following packages.
 // Memory accesses in the packages are either uninteresting or will cause false positives.
 var norace_inst_pkgs = []string{"sync", "sync/atomic"}

--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@ -55,8 +55,7 @@ const (
 func makefield(name string, t *Type) *Type {
 	f := typ(TFIELD)
 	f.Type = t
-	f.Sym = new(Sym)
-	f.Sym.Name = name
+	f.Sym = nopkg.Lookup(name)
 	return f
 }

--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@ -1209,6 +1209,7 @@ func getlit(lit *Node) int {
 	return -1
 }

+// stataddr sets nam to the static address of n and reports whether it succeeeded.
 func stataddr(nam *Node, n *Node) bool {
 	if n == nil {
 		return false
@ -1376,7 +1377,9 @@ func entry(p *InitPlan) *InitEntry {
 	return &p.E[len(p.E)-1]
 }

-func gen_as_init(n *Node) bool {
+// gen_as_init attempts to emit static data for n and reports whether it succeeded.
+// If reportOnly is true, it does not emit static data and does not modify the AST.
+func gen_as_init(n *Node, reportOnly bool) bool {
 	var nr *Node
 	var nl *Node
 	var nam Node
@ -1425,7 +1428,6 @@ func gen_as_init(n *Node) bool {
 	case OSLICEARR:
 		if nr.Right.Op == OKEY && nr.Right.Left == nil && nr.Right.Right == nil {
 			nr = nr.Left
-			gused(nil) // in case the data is the dest of a goto
 			nl := nr
 			if nr == nil || nr.Op != OADDR {
 				goto no
@ -1440,16 +1442,18 @@ func gen_as_init(n *Node) bool {
 				goto no
 			}

-			nam.Xoffset += int64(Array_array)
-			gdata(&nam, nl, int(Types[Tptr].Width))
+			if !reportOnly {
+				nam.Xoffset += int64(Array_array)
+				gdata(&nam, nl, int(Types[Tptr].Width))

-			nam.Xoffset += int64(Array_nel) - int64(Array_array)
-			var nod1 Node
-			Nodconst(&nod1, Types[TINT], nr.Type.Bound)
-			gdata(&nam, &nod1, Widthint)
+				nam.Xoffset += int64(Array_nel) - int64(Array_array)
+				var nod1 Node
+				Nodconst(&nod1, Types[TINT], nr.Type.Bound)
+				gdata(&nam, &nod1, Widthint)

-			nam.Xoffset += int64(Array_cap) - int64(Array_nel)
-			gdata(&nam, &nod1, Widthint)
+				nam.Xoffset += int64(Array_cap) - int64(Array_nel)
+				gdata(&nam, &nod1, Widthint)
+			}

 			return true
 		}
@ -1480,13 +1484,19 @@ func gen_as_init(n *Node) bool {
 		TPTR64,
 		TFLOAT32,
 		TFLOAT64:
-		gdata(&nam, nr, int(nr.Type.Width))
+		if !reportOnly {
+			gdata(&nam, nr, int(nr.Type.Width))
+		}

 	case TCOMPLEX64, TCOMPLEX128:
-		gdatacomplex(&nam, nr.Val().U.(*Mpcplx))
+		if !reportOnly {
+			gdatacomplex(&nam, nr.Val().U.(*Mpcplx))
+		}

 	case TSTRING:
-		gdatastring(&nam, nr.Val().U.(string))
+		if !reportOnly {
+			gdatastring(&nam, nr.Val().U.(string))
+		}
 	}

 	return true
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
--- a/src/cmd/compile/internal/gc/ssa_test.go
+++ b/src/cmd/compile/internal/gc/ssa_test.go
@ -0,0 +1,99 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+	"bytes"
+	"internal/testenv"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+// TODO: move all these tests elsewhere?
+// Perhaps teach test/run.go how to run them with a new action verb.
+func runTest(t *testing.T, filename string) {
+	doTest(t, filename, "run")
+}
+func buildTest(t *testing.T, filename string) {
+	doTest(t, filename, "build")
+}
+func doTest(t *testing.T, filename string, kind string) {
+	if runtime.GOARCH != "amd64" {
+		t.Skipf("skipping SSA tests on %s for now", runtime.GOARCH)
+	}
+	testenv.MustHaveGoBuild(t)
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Command("go", kind, filepath.Join("testdata", filename))
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("Failed: %v:\nOut: %s\nStderr: %s\n", err, &stdout, &stderr)
+	}
+	if s := stdout.String(); s != "" {
+		t.Errorf("Stdout = %s\nWant empty", s)
+	}
+	if s := stderr.String(); strings.Contains(s, "SSA unimplemented") {
+		t.Errorf("Unimplemented message found in stderr:\n%s", s)
+	}
+}
+
+// TestShortCircuit tests OANDAND and OOROR expressions and short circuiting.
+func TestShortCircuit(t *testing.T) { runTest(t, "short_ssa.go") }
+
+// TestBreakContinue tests that continue and break statements do what they say.
+func TestBreakContinue(t *testing.T) { runTest(t, "break_ssa.go") }
+
+// TestTypeAssertion tests type assertions.
+func TestTypeAssertion(t *testing.T) { runTest(t, "assert_ssa.go") }
+
+// TestArithmetic tests that both backends have the same result for arithmetic expressions.
+func TestArithmetic(t *testing.T) { runTest(t, "arith_ssa.go") }
+
+// TestFP tests that both backends have the same result for floating point expressions.
+func TestFP(t *testing.T) { runTest(t, "fp_ssa.go") }
+
+// TestArithmeticBoundary tests boundary results for arithmetic operations.
+func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary_ssa.go") }
+
+// TestArithmeticConst tests results for arithmetic operations against constants.
+func TestArithmeticConst(t *testing.T) { runTest(t, "arithConst_ssa.go") }
+
+func TestChan(t *testing.T) { runTest(t, "chan_ssa.go") }
+
+func TestCompound(t *testing.T) { runTest(t, "compound_ssa.go") }
+
+func TestCtl(t *testing.T) { runTest(t, "ctl_ssa.go") }
+
+func TestFp(t *testing.T) { runTest(t, "fp_ssa.go") }
+
+func TestLoadStore(t *testing.T) { runTest(t, "loadstore_ssa.go") }
+
+func TestMap(t *testing.T) { runTest(t, "map_ssa.go") }
+
+func TestRegalloc(t *testing.T) { runTest(t, "regalloc_ssa.go") }
+
+func TestString(t *testing.T) { runTest(t, "string_ssa.go") }
+
+func TestDeferNoReturn(t *testing.T) { buildTest(t, "deferNoReturn_ssa.go") }
+
+// TestClosure tests closure related behavior.
+func TestClosure(t *testing.T) { runTest(t, "closure_ssa.go") }
+
+func TestArray(t *testing.T) { runTest(t, "array_ssa.go") }
+
+func TestAppend(t *testing.T) { runTest(t, "append_ssa.go") }
+
+func TestZero(t *testing.T) { runTest(t, "zero_ssa.go") }
+
+func TestAddressed(t *testing.T) { runTest(t, "addressed_ssa.go") }
+
+func TestCopy(t *testing.T) { runTest(t, "copy_ssa.go") }
+
+func TestUnsafe(t *testing.T) { runTest(t, "unsafe_ssa.go") }
+
+func TestPhi(t *testing.T) { runTest(t, "phi_ssa.go") }
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@ -149,7 +149,7 @@ type Param struct {
 // Func holds Node fields used only with function-like nodes.
 type Func struct {
 	Shortname  *Node
-	Enter      Nodes
+	Enter      Nodes // for example, allocate and initialize memory for escaping parameters
 	Exit       Nodes
 	Cvars      Nodes    // closure params
 	Dcl        []*Node  // autodcl for this func/closure
--- a/src/cmd/compile/internal/gc/testdata/addressed_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/addressed_ssa.go
@ -0,0 +1,216 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "fmt"
+
+var output string
+
+func mypanic(s string) {
+	fmt.Printf(output)
+	panic(s)
+}
+
+func assertEqual(x, y int) {
+	if x != y {
+		mypanic("assertEqual failed")
+	}
+}
+
+func main() {
+	x := f1_ssa(2, 3)
+	output += fmt.Sprintln("*x is", *x)
+	output += fmt.Sprintln("Gratuitously use some stack")
+	output += fmt.Sprintln("*x is", *x)
+	assertEqual(*x, 9)
+
+	w := f3a_ssa(6)
+	output += fmt.Sprintln("*w is", *w)
+	output += fmt.Sprintln("Gratuitously use some stack")
+	output += fmt.Sprintln("*w is", *w)
+	assertEqual(*w, 6)
+
+	y := f3b_ssa(12)
+	output += fmt.Sprintln("*y.(*int) is", *y.(*int))
+	output += fmt.Sprintln("Gratuitously use some stack")
+	output += fmt.Sprintln("*y.(*int) is", *y.(*int))
+	assertEqual(*y.(*int), 12)
+
+	z := f3c_ssa(8)
+	output += fmt.Sprintln("*z.(*int) is", *z.(*int))
+	output += fmt.Sprintln("Gratuitously use some stack")
+	output += fmt.Sprintln("*z.(*int) is", *z.(*int))
+	assertEqual(*z.(*int), 8)
+
+	args()
+	test_autos()
+}
+
+func f1_ssa(x, y int) *int {
+	switch {
+	} //go:noinline
+	x = x*y + y
+	return &x
+}
+
+func f3a_ssa(x int) *int {
+	switch {
+	} //go:noinline
+	return &x
+}
+
+func f3b_ssa(x int) interface{} { // ./foo.go:15: internal error: f3b_ssa ~r1 (type interface {}) recorded as live on entry
+	switch {
+	} //go:noinline
+	return &x
+}
+
+func f3c_ssa(y int) interface{} {
+	switch {
+	} //go:noinline
+	x := y
+	return &x
+}
+
+type V struct {
+	p    *V
+	w, x int64
+}
+
+func args() {
+	v := V{p: nil, w: 1, x: 1}
+	a := V{p: &v, w: 2, x: 2}
+	b := V{p: &v, w: 0, x: 0}
+	i := v.args_ssa(a, b)
+	output += fmt.Sprintln("i=", i)
+	assertEqual(int(i), 2)
+}
+
+func (v V) args_ssa(a, b V) int64 {
+	switch {
+	} //go:noinline
+	if v.w == 0 {
+		return v.x
+	}
+	if v.w == 1 {
+		return a.x
+	}
+	if v.w == 2 {
+		return b.x
+	}
+	b.p.p = &a // v.p in caller = &a
+
+	return -1
+}
+
+func test_autos() {
+	test(11)
+	test(12)
+	test(13)
+	test(21)
+	test(22)
+	test(23)
+	test(31)
+	test(32)
+}
+
+func test(which int64) {
+	output += fmt.Sprintln("test", which)
+	v1 := V{w: 30, x: 3, p: nil}
+	v2, v3 := v1.autos_ssa(which, 10, 1, 20, 2)
+	if which != v2.val() {
+		output += fmt.Sprintln("Expected which=", which, "got v2.val()=", v2.val())
+		mypanic("Failure of expected V value")
+	}
+	if v2.p.val() != v3.val() {
+		output += fmt.Sprintln("Expected v2.p.val()=", v2.p.val(), "got v3.val()=", v3.val())
+		mypanic("Failure of expected V.p value")
+	}
+	if which != v3.p.p.p.p.p.p.p.val() {
+		output += fmt.Sprintln("Expected which=", which, "got v3.p.p.p.p.p.p.p.val()=", v3.p.p.p.p.p.p.p.val())
+		mypanic("Failure of expected V.p value")
+	}
+}
+
+func (v V) val() int64 {
+	return v.w + v.x
+}
+
+// autos_ssa uses contents of v and parameters w1, w2, x1, x2
+// to initialize a bunch of locals, all of which have their
+// address taken to force heap allocation, and then based on
+// the value of which a pair of those locals are copied in
+// various ways to the two results y, and z, which are also
+// addressed.  Which is expected to be one of 11-13, 21-23, 31, 32,
+// and y.val() should be equal to which and y.p.val() should
+// be equal to z.val().  Also, x(.p)**8 == x; that is, the
+// autos are all linked into a ring.
+func (v V) autos_ssa(which, w1, x1, w2, x2 int64) (y, z V) {
+	switch {
+	} //go:noinline
+	fill_ssa(v.w, v.x, &v, v.p) // gratuitous no-op to force addressing
+	var a, b, c, d, e, f, g, h V
+	fill_ssa(w1, x1, &a, &b)
+	fill_ssa(w1, x2, &b, &c)
+	fill_ssa(w1, v.x, &c, &d)
+	fill_ssa(w2, x1, &d, &e)
+	fill_ssa(w2, x2, &e, &f)
+	fill_ssa(w2, v.x, &f, &g)
+	fill_ssa(v.w, x1, &g, &h)
+	fill_ssa(v.w, x2, &h, &a)
+	switch which {
+	case 11:
+		y = a
+		z.getsI(&b)
+	case 12:
+		y.gets(&b)
+		z = c
+	case 13:
+		y.gets(&c)
+		z = d
+	case 21:
+		y.getsI(&d)
+		z.gets(&e)
+	case 22:
+		y = e
+		z = f
+	case 23:
+		y.gets(&f)
+		z.getsI(&g)
+	case 31:
+		y = g
+		z.gets(&h)
+	case 32:
+		y.getsI(&h)
+		z = a
+	default:
+
+		panic("")
+	}
+	return
+}
+
+// gets is an address-mentioning way of implementing
+// structure assignment.
+func (to *V) gets(from *V) {
+	switch {
+	} //go:noinline
+	*to = *from
+}
+
+// gets is an address-and-interface-mentioning way of
+// implementing structure assignment.
+func (to *V) getsI(from interface{}) {
+	switch {
+	} //go:noinline
+	*to = *from.(*V)
+}
+
+// fill_ssa initializes r with V{w:w, x:x, p:p}
+func fill_ssa(w, x int64, r, p *V) {
+	switch {
+	} //go:noinline
+	*r = V{w: w, x: x, p: p}
+}
--- a/src/cmd/compile/internal/gc/testdata/append_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/append_ssa.go
@ -0,0 +1,70 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// append_ssa.go tests append operations.
+package main
+
+import "fmt"
+
+var failed = false
+
+//go:noinline
+func appendOne_ssa(a []int, x int) []int {
+	return append(a, x)
+}
+
+//go:noinline
+func appendThree_ssa(a []int, x, y, z int) []int {
+	return append(a, x, y, z)
+}
+
+func eq(a, b []int) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func expect(got, want []int) {
+	if eq(got, want) {
+		return
+	}
+	fmt.Printf("expected %v, got %v\n", want, got)
+	failed = true
+}
+
+func testAppend() {
+	var store [7]int
+	a := store[:0]
+
+	a = appendOne_ssa(a, 1)
+	expect(a, []int{1})
+	a = appendThree_ssa(a, 2, 3, 4)
+	expect(a, []int{1, 2, 3, 4})
+	a = appendThree_ssa(a, 5, 6, 7)
+	expect(a, []int{1, 2, 3, 4, 5, 6, 7})
+	if &a[0] != &store[0] {
+		fmt.Println("unnecessary grow")
+		failed = true
+	}
+	a = appendOne_ssa(a, 8)
+	expect(a, []int{1, 2, 3, 4, 5, 6, 7, 8})
+	if &a[0] == &store[0] {
+		fmt.Println("didn't grow")
+		failed = true
+	}
+}
+
+func main() {
+	testAppend()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/arithBoundary_ssa.go
@ -0,0 +1,735 @@
+package main
+
+import "fmt"
+
+type utd64 struct {
+	a, b                    uint64
+	add, sub, mul, div, mod uint64
+}
+type itd64 struct {
+	a, b                    int64
+	add, sub, mul, div, mod int64
+}
+type utd32 struct {
+	a, b                    uint32
+	add, sub, mul, div, mod uint32
+}
+type itd32 struct {
+	a, b                    int32
+	add, sub, mul, div, mod int32
+}
+type utd16 struct {
+	a, b                    uint16
+	add, sub, mul, div, mod uint16
+}
+type itd16 struct {
+	a, b                    int16
+	add, sub, mul, div, mod int16
+}
+type utd8 struct {
+	a, b                    uint8
+	add, sub, mul, div, mod uint8
+}
+type itd8 struct {
+	a, b                    int8
+	add, sub, mul, div, mod int8
+}
+
+//go:noinline
+func add_uint64_ssa(a, b uint64) uint64 {
+	return a + b
+}
+
+//go:noinline
+func sub_uint64_ssa(a, b uint64) uint64 {
+	return a - b
+}
+
+//go:noinline
+func div_uint64_ssa(a, b uint64) uint64 {
+	return a / b
+}
+
+//go:noinline
+func mod_uint64_ssa(a, b uint64) uint64 {
+	return a % b
+}
+
+//go:noinline
+func mul_uint64_ssa(a, b uint64) uint64 {
+	return a * b
+}
+
+//go:noinline
+func add_int64_ssa(a, b int64) int64 {
+	return a + b
+}
+
+//go:noinline
+func sub_int64_ssa(a, b int64) int64 {
+	return a - b
+}
+
+//go:noinline
+func div_int64_ssa(a, b int64) int64 {
+	return a / b
+}
+
+//go:noinline
+func mod_int64_ssa(a, b int64) int64 {
+	return a % b
+}
+
+//go:noinline
+func mul_int64_ssa(a, b int64) int64 {
+	return a * b
+}
+
+//go:noinline
+func add_uint32_ssa(a, b uint32) uint32 {
+	return a + b
+}
+
+//go:noinline
+func sub_uint32_ssa(a, b uint32) uint32 {
+	return a - b
+}
+
+//go:noinline
+func div_uint32_ssa(a, b uint32) uint32 {
+	return a / b
+}
+
+//go:noinline
+func mod_uint32_ssa(a, b uint32) uint32 {
+	return a % b
+}
+
+//go:noinline
+func mul_uint32_ssa(a, b uint32) uint32 {
+	return a * b
+}
+
+//go:noinline
+func add_int32_ssa(a, b int32) int32 {
+	return a + b
+}
+
+//go:noinline
+func sub_int32_ssa(a, b int32) int32 {
+	return a - b
+}
+
+//go:noinline
+func div_int32_ssa(a, b int32) int32 {
+	return a / b
+}
+
+//go:noinline
+func mod_int32_ssa(a, b int32) int32 {
+	return a % b
+}
+
+//go:noinline
+func mul_int32_ssa(a, b int32) int32 {
+	return a * b
+}
+
+//go:noinline
+func add_uint16_ssa(a, b uint16) uint16 {
+	return a + b
+}
+
+//go:noinline
+func sub_uint16_ssa(a, b uint16) uint16 {
+	return a - b
+}
+
+//go:noinline
+func div_uint16_ssa(a, b uint16) uint16 {
+	return a / b
+}
+
+//go:noinline
+func mod_uint16_ssa(a, b uint16) uint16 {
+	return a % b
+}
+
+//go:noinline
+func mul_uint16_ssa(a, b uint16) uint16 {
+	return a * b
+}
+
+//go:noinline
+func add_int16_ssa(a, b int16) int16 {
+	return a + b
+}
+
+//go:noinline
+func sub_int16_ssa(a, b int16) int16 {
+	return a - b
+}
+
+//go:noinline
+func div_int16_ssa(a, b int16) int16 {
+	return a / b
+}
+
+//go:noinline
+func mod_int16_ssa(a, b int16) int16 {
+	return a % b
+}
+
+//go:noinline
+func mul_int16_ssa(a, b int16) int16 {
+	return a * b
+}
+
+//go:noinline
+func add_uint8_ssa(a, b uint8) uint8 {
+	return a + b
+}
+
+//go:noinline
+func sub_uint8_ssa(a, b uint8) uint8 {
+	return a - b
+}
+
+//go:noinline
+func div_uint8_ssa(a, b uint8) uint8 {
+	return a / b
+}
+
+//go:noinline
+func mod_uint8_ssa(a, b uint8) uint8 {
+	return a % b
+}
+
+//go:noinline
+func mul_uint8_ssa(a, b uint8) uint8 {
+	return a * b
+}
+
+//go:noinline
+func add_int8_ssa(a, b int8) int8 {
+	return a + b
+}
+
+//go:noinline
+func sub_int8_ssa(a, b int8) int8 {
+	return a - b
+}
+
+//go:noinline
+func div_int8_ssa(a, b int8) int8 {
+	return a / b
+}
+
+//go:noinline
+func mod_int8_ssa(a, b int8) int8 {
+	return a % b
+}
+
+//go:noinline
+func mul_int8_ssa(a, b int8) int8 {
+	return a * b
+}
+
+var uint64_data []utd64 = []utd64{utd64{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd64{a: 0, b: 1, add: 1, sub: 18446744073709551615, mul: 0, div: 0, mod: 0},
+	utd64{a: 0, b: 4294967296, add: 4294967296, sub: 18446744069414584320, mul: 0, div: 0, mod: 0},
+	utd64{a: 0, b: 18446744073709551615, add: 18446744073709551615, sub: 1, mul: 0, div: 0, mod: 0},
+	utd64{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	utd64{a: 1, b: 4294967296, add: 4294967297, sub: 18446744069414584321, mul: 4294967296, div: 0, mod: 1},
+	utd64{a: 1, b: 18446744073709551615, add: 0, sub: 2, mul: 18446744073709551615, div: 0, mod: 1},
+	utd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0},
+	utd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296, mod: 0},
+	utd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1, mod: 0},
+	utd64{a: 4294967296, b: 18446744073709551615, add: 4294967295, sub: 4294967297, mul: 18446744069414584320, div: 0, mod: 4294967296},
+	utd64{a: 18446744073709551615, b: 0, add: 18446744073709551615, sub: 18446744073709551615, mul: 0},
+	utd64{a: 18446744073709551615, b: 1, add: 0, sub: 18446744073709551614, mul: 18446744073709551615, div: 18446744073709551615, mod: 0},
+	utd64{a: 18446744073709551615, b: 4294967296, add: 4294967295, sub: 18446744069414584319, mul: 18446744069414584320, div: 4294967295, mod: 4294967295},
+	utd64{a: 18446744073709551615, b: 18446744073709551615, add: 18446744073709551614, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var int64_data []itd64 = []itd64{itd64{a: -9223372036854775808, b: -9223372036854775808, add: 0, sub: 0, mul: 0, div: 1, mod: 0},
+	itd64{a: -9223372036854775808, b: -9223372036854775807, add: 1, sub: -1, mul: -9223372036854775808, div: 1, mod: -1},
+	itd64{a: -9223372036854775808, b: -4294967296, add: 9223372032559808512, sub: -9223372032559808512, mul: 0, div: 2147483648, mod: 0},
+	itd64{a: -9223372036854775808, b: -1, add: 9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808, mod: 0},
+	itd64{a: -9223372036854775808, b: 0, add: -9223372036854775808, sub: -9223372036854775808, mul: 0},
+	itd64{a: -9223372036854775808, b: 1, add: -9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: -9223372036854775808, mod: 0},
+	itd64{a: -9223372036854775808, b: 4294967296, add: -9223372032559808512, sub: 9223372032559808512, mul: 0, div: -2147483648, mod: 0},
+	itd64{a: -9223372036854775808, b: 9223372036854775806, add: -2, sub: 2, mul: 0, div: -1, mod: -2},
+	itd64{a: -9223372036854775808, b: 9223372036854775807, add: -1, sub: 1, mul: -9223372036854775808, div: -1, mod: -1},
+	itd64{a: -9223372036854775807, b: -9223372036854775808, add: 1, sub: 1, mul: -9223372036854775808, div: 0, mod: -9223372036854775807},
+	itd64{a: -9223372036854775807, b: -9223372036854775807, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd64{a: -9223372036854775807, b: -4294967296, add: 9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 2147483647, mod: -4294967295},
+	itd64{a: -9223372036854775807, b: -1, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807, mod: 0},
+	itd64{a: -9223372036854775807, b: 0, add: -9223372036854775807, sub: -9223372036854775807, mul: 0},
+	itd64{a: -9223372036854775807, b: 1, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807, mod: 0},
+	itd64{a: -9223372036854775807, b: 4294967296, add: -9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: -2147483647, mod: -4294967295},
+	itd64{a: -9223372036854775807, b: 9223372036854775806, add: -1, sub: 3, mul: 9223372036854775806, div: -1, mod: -1},
+	itd64{a: -9223372036854775807, b: 9223372036854775807, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd64{a: -4294967296, b: -9223372036854775808, add: 9223372032559808512, sub: 9223372032559808512, mul: 0, div: 0, mod: -4294967296},
+	itd64{a: -4294967296, b: -9223372036854775807, add: 9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 0, mod: -4294967296},
+	itd64{a: -4294967296, b: -4294967296, add: -8589934592, sub: 0, mul: 0, div: 1, mod: 0},
+	itd64{a: -4294967296, b: -1, add: -4294967297, sub: -4294967295, mul: 4294967296, div: 4294967296, mod: 0},
+	itd64{a: -4294967296, b: 0, add: -4294967296, sub: -4294967296, mul: 0},
+	itd64{a: -4294967296, b: 1, add: -4294967295, sub: -4294967297, mul: -4294967296, div: -4294967296, mod: 0},
+	itd64{a: -4294967296, b: 4294967296, add: 0, sub: -8589934592, mul: 0, div: -1, mod: 0},
+	itd64{a: -4294967296, b: 9223372036854775806, add: 9223372032559808510, sub: 9223372032559808514, mul: 8589934592, div: 0, mod: -4294967296},
+	itd64{a: -4294967296, b: 9223372036854775807, add: 9223372032559808511, sub: 9223372032559808513, mul: 4294967296, div: 0, mod: -4294967296},
+	itd64{a: -1, b: -9223372036854775808, add: 9223372036854775807, sub: 9223372036854775807, mul: -9223372036854775808, div: 0, mod: -1},
+	itd64{a: -1, b: -9223372036854775807, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 0, mod: -1},
+	itd64{a: -1, b: -4294967296, add: -4294967297, sub: 4294967295, mul: 4294967296, div: 0, mod: -1},
+	itd64{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd64{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd64{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd64{a: -1, b: 4294967296, add: 4294967295, sub: -4294967297, mul: -4294967296, div: 0, mod: -1},
+	itd64{a: -1, b: 9223372036854775806, add: 9223372036854775805, sub: -9223372036854775807, mul: -9223372036854775806, div: 0, mod: -1},
+	itd64{a: -1, b: 9223372036854775807, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0, mod: -1},
+	itd64{a: 0, b: -9223372036854775808, add: -9223372036854775808, sub: -9223372036854775808, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: -9223372036854775807, add: -9223372036854775807, sub: 9223372036854775807, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: -4294967296, add: -4294967296, sub: 4294967296, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd64{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: 4294967296, add: 4294967296, sub: -4294967296, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: 9223372036854775806, add: 9223372036854775806, sub: -9223372036854775806, mul: 0, div: 0, mod: 0},
+	itd64{a: 0, b: 9223372036854775807, add: 9223372036854775807, sub: -9223372036854775807, mul: 0, div: 0, mod: 0},
+	itd64{a: 1, b: -9223372036854775808, add: -9223372036854775807, sub: -9223372036854775807, mul: -9223372036854775808, div: 0, mod: 1},
+	itd64{a: 1, b: -9223372036854775807, add: -9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: 0, mod: 1},
+	itd64{a: 1, b: -4294967296, add: -4294967295, sub: 4294967297, mul: -4294967296, div: 0, mod: 1},
+	itd64{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd64{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd64{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd64{a: 1, b: 4294967296, add: 4294967297, sub: -4294967295, mul: 4294967296, div: 0, mod: 1},
+	itd64{a: 1, b: 9223372036854775806, add: 9223372036854775807, sub: -9223372036854775805, mul: 9223372036854775806, div: 0, mod: 1},
+	itd64{a: 1, b: 9223372036854775807, add: -9223372036854775808, sub: -9223372036854775806, mul: 9223372036854775807, div: 0, mod: 1},
+	itd64{a: 4294967296, b: -9223372036854775808, add: -9223372032559808512, sub: -9223372032559808512, mul: 0, div: 0, mod: 4294967296},
+	itd64{a: 4294967296, b: -9223372036854775807, add: -9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: 0, mod: 4294967296},
+	itd64{a: 4294967296, b: -4294967296, add: 0, sub: 8589934592, mul: 0, div: -1, mod: 0},
+	itd64{a: 4294967296, b: -1, add: 4294967295, sub: 4294967297, mul: -4294967296, div: -4294967296, mod: 0},
+	itd64{a: 4294967296, b: 0, add: 4294967296, sub: 4294967296, mul: 0},
+	itd64{a: 4294967296, b: 1, add: 4294967297, sub: 4294967295, mul: 4294967296, div: 4294967296, mod: 0},
+	itd64{a: 4294967296, b: 4294967296, add: 8589934592, sub: 0, mul: 0, div: 1, mod: 0},
+	itd64{a: 4294967296, b: 9223372036854775806, add: -9223372032559808514, sub: -9223372032559808510, mul: -8589934592, div: 0, mod: 4294967296},
+	itd64{a: 4294967296, b: 9223372036854775807, add: -9223372032559808513, sub: -9223372032559808511, mul: -4294967296, div: 0, mod: 4294967296},
+	itd64{a: 9223372036854775806, b: -9223372036854775808, add: -2, sub: -2, mul: 0, div: 0, mod: 9223372036854775806},
+	itd64{a: 9223372036854775806, b: -9223372036854775807, add: -1, sub: -3, mul: 9223372036854775806, div: 0, mod: 9223372036854775806},
+	itd64{a: 9223372036854775806, b: -4294967296, add: 9223372032559808510, sub: -9223372032559808514, mul: 8589934592, div: -2147483647, mod: 4294967294},
+	itd64{a: 9223372036854775806, b: -1, add: 9223372036854775805, sub: 9223372036854775807, mul: -9223372036854775806, div: -9223372036854775806, mod: 0},
+	itd64{a: 9223372036854775806, b: 0, add: 9223372036854775806, sub: 9223372036854775806, mul: 0},
+	itd64{a: 9223372036854775806, b: 1, add: 9223372036854775807, sub: 9223372036854775805, mul: 9223372036854775806, div: 9223372036854775806, mod: 0},
+	itd64{a: 9223372036854775806, b: 4294967296, add: -9223372032559808514, sub: 9223372032559808510, mul: -8589934592, div: 2147483647, mod: 4294967294},
+	itd64{a: 9223372036854775806, b: 9223372036854775806, add: -4, sub: 0, mul: 4, div: 1, mod: 0},
+	itd64{a: 9223372036854775806, b: 9223372036854775807, add: -3, sub: -1, mul: -9223372036854775806, div: 0, mod: 9223372036854775806},
+	itd64{a: 9223372036854775807, b: -9223372036854775808, add: -1, sub: -1, mul: -9223372036854775808, div: 0, mod: 9223372036854775807},
+	itd64{a: 9223372036854775807, b: -9223372036854775807, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd64{a: 9223372036854775807, b: -4294967296, add: 9223372032559808511, sub: -9223372032559808513, mul: 4294967296, div: -2147483647, mod: 4294967295},
+	itd64{a: 9223372036854775807, b: -1, add: 9223372036854775806, sub: -9223372036854775808, mul: -9223372036854775807, div: -9223372036854775807, mod: 0},
+	itd64{a: 9223372036854775807, b: 0, add: 9223372036854775807, sub: 9223372036854775807, mul: 0},
+	itd64{a: 9223372036854775807, b: 1, add: -9223372036854775808, sub: 9223372036854775806, mul: 9223372036854775807, div: 9223372036854775807, mod: 0},
+	itd64{a: 9223372036854775807, b: 4294967296, add: -9223372032559808513, sub: 9223372032559808511, mul: -4294967296, div: 2147483647, mod: 4294967295},
+	itd64{a: 9223372036854775807, b: 9223372036854775806, add: -3, sub: 1, mul: -9223372036854775806, div: 1, mod: 1},
+	itd64{a: 9223372036854775807, b: 9223372036854775807, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var uint32_data []utd32 = []utd32{utd32{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd32{a: 0, b: 1, add: 1, sub: 4294967295, mul: 0, div: 0, mod: 0},
+	utd32{a: 0, b: 4294967295, add: 4294967295, sub: 1, mul: 0, div: 0, mod: 0},
+	utd32{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	utd32{a: 1, b: 4294967295, add: 0, sub: 2, mul: 4294967295, div: 0, mod: 1},
+	utd32{a: 4294967295, b: 0, add: 4294967295, sub: 4294967295, mul: 0},
+	utd32{a: 4294967295, b: 1, add: 0, sub: 4294967294, mul: 4294967295, div: 4294967295, mod: 0},
+	utd32{a: 4294967295, b: 4294967295, add: 4294967294, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var int32_data []itd32 = []itd32{itd32{a: -2147483648, b: -2147483648, add: 0, sub: 0, mul: 0, div: 1, mod: 0},
+	itd32{a: -2147483648, b: -2147483647, add: 1, sub: -1, mul: -2147483648, div: 1, mod: -1},
+	itd32{a: -2147483648, b: -1, add: 2147483647, sub: -2147483647, mul: -2147483648, div: -2147483648, mod: 0},
+	itd32{a: -2147483648, b: 0, add: -2147483648, sub: -2147483648, mul: 0},
+	itd32{a: -2147483648, b: 1, add: -2147483647, sub: 2147483647, mul: -2147483648, div: -2147483648, mod: 0},
+	itd32{a: -2147483648, b: 2147483647, add: -1, sub: 1, mul: -2147483648, div: -1, mod: -1},
+	itd32{a: -2147483647, b: -2147483648, add: 1, sub: 1, mul: -2147483648, div: 0, mod: -2147483647},
+	itd32{a: -2147483647, b: -2147483647, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd32{a: -2147483647, b: -1, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 2147483647, mod: 0},
+	itd32{a: -2147483647, b: 0, add: -2147483647, sub: -2147483647, mul: 0},
+	itd32{a: -2147483647, b: 1, add: -2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647, mod: 0},
+	itd32{a: -2147483647, b: 2147483647, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd32{a: -1, b: -2147483648, add: 2147483647, sub: 2147483647, mul: -2147483648, div: 0, mod: -1},
+	itd32{a: -1, b: -2147483647, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 0, mod: -1},
+	itd32{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd32{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd32{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd32{a: -1, b: 2147483647, add: 2147483646, sub: -2147483648, mul: -2147483647, div: 0, mod: -1},
+	itd32{a: 0, b: -2147483648, add: -2147483648, sub: -2147483648, mul: 0, div: 0, mod: 0},
+	itd32{a: 0, b: -2147483647, add: -2147483647, sub: 2147483647, mul: 0, div: 0, mod: 0},
+	itd32{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0, mod: 0},
+	itd32{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd32{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0, mod: 0},
+	itd32{a: 0, b: 2147483647, add: 2147483647, sub: -2147483647, mul: 0, div: 0, mod: 0},
+	itd32{a: 1, b: -2147483648, add: -2147483647, sub: -2147483647, mul: -2147483648, div: 0, mod: 1},
+	itd32{a: 1, b: -2147483647, add: -2147483646, sub: -2147483648, mul: -2147483647, div: 0, mod: 1},
+	itd32{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd32{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd32{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd32{a: 1, b: 2147483647, add: -2147483648, sub: -2147483646, mul: 2147483647, div: 0, mod: 1},
+	itd32{a: 2147483647, b: -2147483648, add: -1, sub: -1, mul: -2147483648, div: 0, mod: 2147483647},
+	itd32{a: 2147483647, b: -2147483647, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd32{a: 2147483647, b: -1, add: 2147483646, sub: -2147483648, mul: -2147483647, div: -2147483647, mod: 0},
+	itd32{a: 2147483647, b: 0, add: 2147483647, sub: 2147483647, mul: 0},
+	itd32{a: 2147483647, b: 1, add: -2147483648, sub: 2147483646, mul: 2147483647, div: 2147483647, mod: 0},
+	itd32{a: 2147483647, b: 2147483647, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var uint16_data []utd16 = []utd16{utd16{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd16{a: 0, b: 1, add: 1, sub: 65535, mul: 0, div: 0, mod: 0},
+	utd16{a: 0, b: 65535, add: 65535, sub: 1, mul: 0, div: 0, mod: 0},
+	utd16{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	utd16{a: 1, b: 65535, add: 0, sub: 2, mul: 65535, div: 0, mod: 1},
+	utd16{a: 65535, b: 0, add: 65535, sub: 65535, mul: 0},
+	utd16{a: 65535, b: 1, add: 0, sub: 65534, mul: 65535, div: 65535, mod: 0},
+	utd16{a: 65535, b: 65535, add: 65534, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var int16_data []itd16 = []itd16{itd16{a: -32768, b: -32768, add: 0, sub: 0, mul: 0, div: 1, mod: 0},
+	itd16{a: -32768, b: -32767, add: 1, sub: -1, mul: -32768, div: 1, mod: -1},
+	itd16{a: -32768, b: -1, add: 32767, sub: -32767, mul: -32768, div: -32768, mod: 0},
+	itd16{a: -32768, b: 0, add: -32768, sub: -32768, mul: 0},
+	itd16{a: -32768, b: 1, add: -32767, sub: 32767, mul: -32768, div: -32768, mod: 0},
+	itd16{a: -32768, b: 32766, add: -2, sub: 2, mul: 0, div: -1, mod: -2},
+	itd16{a: -32768, b: 32767, add: -1, sub: 1, mul: -32768, div: -1, mod: -1},
+	itd16{a: -32767, b: -32768, add: 1, sub: 1, mul: -32768, div: 0, mod: -32767},
+	itd16{a: -32767, b: -32767, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd16{a: -32767, b: -1, add: -32768, sub: -32766, mul: 32767, div: 32767, mod: 0},
+	itd16{a: -32767, b: 0, add: -32767, sub: -32767, mul: 0},
+	itd16{a: -32767, b: 1, add: -32766, sub: -32768, mul: -32767, div: -32767, mod: 0},
+	itd16{a: -32767, b: 32766, add: -1, sub: 3, mul: 32766, div: -1, mod: -1},
+	itd16{a: -32767, b: 32767, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd16{a: -1, b: -32768, add: 32767, sub: 32767, mul: -32768, div: 0, mod: -1},
+	itd16{a: -1, b: -32767, add: -32768, sub: 32766, mul: 32767, div: 0, mod: -1},
+	itd16{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd16{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd16{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd16{a: -1, b: 32766, add: 32765, sub: -32767, mul: -32766, div: 0, mod: -1},
+	itd16{a: -1, b: 32767, add: 32766, sub: -32768, mul: -32767, div: 0, mod: -1},
+	itd16{a: 0, b: -32768, add: -32768, sub: -32768, mul: 0, div: 0, mod: 0},
+	itd16{a: 0, b: -32767, add: -32767, sub: 32767, mul: 0, div: 0, mod: 0},
+	itd16{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0, mod: 0},
+	itd16{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd16{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0, mod: 0},
+	itd16{a: 0, b: 32766, add: 32766, sub: -32766, mul: 0, div: 0, mod: 0},
+	itd16{a: 0, b: 32767, add: 32767, sub: -32767, mul: 0, div: 0, mod: 0},
+	itd16{a: 1, b: -32768, add: -32767, sub: -32767, mul: -32768, div: 0, mod: 1},
+	itd16{a: 1, b: -32767, add: -32766, sub: -32768, mul: -32767, div: 0, mod: 1},
+	itd16{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd16{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd16{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd16{a: 1, b: 32766, add: 32767, sub: -32765, mul: 32766, div: 0, mod: 1},
+	itd16{a: 1, b: 32767, add: -32768, sub: -32766, mul: 32767, div: 0, mod: 1},
+	itd16{a: 32766, b: -32768, add: -2, sub: -2, mul: 0, div: 0, mod: 32766},
+	itd16{a: 32766, b: -32767, add: -1, sub: -3, mul: 32766, div: 0, mod: 32766},
+	itd16{a: 32766, b: -1, add: 32765, sub: 32767, mul: -32766, div: -32766, mod: 0},
+	itd16{a: 32766, b: 0, add: 32766, sub: 32766, mul: 0},
+	itd16{a: 32766, b: 1, add: 32767, sub: 32765, mul: 32766, div: 32766, mod: 0},
+	itd16{a: 32766, b: 32766, add: -4, sub: 0, mul: 4, div: 1, mod: 0},
+	itd16{a: 32766, b: 32767, add: -3, sub: -1, mul: -32766, div: 0, mod: 32766},
+	itd16{a: 32767, b: -32768, add: -1, sub: -1, mul: -32768, div: 0, mod: 32767},
+	itd16{a: 32767, b: -32767, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd16{a: 32767, b: -1, add: 32766, sub: -32768, mul: -32767, div: -32767, mod: 0},
+	itd16{a: 32767, b: 0, add: 32767, sub: 32767, mul: 0},
+	itd16{a: 32767, b: 1, add: -32768, sub: 32766, mul: 32767, div: 32767, mod: 0},
+	itd16{a: 32767, b: 32766, add: -3, sub: 1, mul: -32766, div: 1, mod: 1},
+	itd16{a: 32767, b: 32767, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var uint8_data []utd8 = []utd8{utd8{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	utd8{a: 0, b: 1, add: 1, sub: 255, mul: 0, div: 0, mod: 0},
+	utd8{a: 0, b: 255, add: 255, sub: 1, mul: 0, div: 0, mod: 0},
+	utd8{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	utd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	utd8{a: 1, b: 255, add: 0, sub: 2, mul: 255, div: 0, mod: 1},
+	utd8{a: 255, b: 0, add: 255, sub: 255, mul: 0},
+	utd8{a: 255, b: 1, add: 0, sub: 254, mul: 255, div: 255, mod: 0},
+	utd8{a: 255, b: 255, add: 254, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var int8_data []itd8 = []itd8{itd8{a: -128, b: -128, add: 0, sub: 0, mul: 0, div: 1, mod: 0},
+	itd8{a: -128, b: -127, add: 1, sub: -1, mul: -128, div: 1, mod: -1},
+	itd8{a: -128, b: -1, add: 127, sub: -127, mul: -128, div: -128, mod: 0},
+	itd8{a: -128, b: 0, add: -128, sub: -128, mul: 0},
+	itd8{a: -128, b: 1, add: -127, sub: 127, mul: -128, div: -128, mod: 0},
+	itd8{a: -128, b: 126, add: -2, sub: 2, mul: 0, div: -1, mod: -2},
+	itd8{a: -128, b: 127, add: -1, sub: 1, mul: -128, div: -1, mod: -1},
+	itd8{a: -127, b: -128, add: 1, sub: 1, mul: -128, div: 0, mod: -127},
+	itd8{a: -127, b: -127, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd8{a: -127, b: -1, add: -128, sub: -126, mul: 127, div: 127, mod: 0},
+	itd8{a: -127, b: 0, add: -127, sub: -127, mul: 0},
+	itd8{a: -127, b: 1, add: -126, sub: -128, mul: -127, div: -127, mod: 0},
+	itd8{a: -127, b: 126, add: -1, sub: 3, mul: 126, div: -1, mod: -1},
+	itd8{a: -127, b: 127, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd8{a: -1, b: -128, add: 127, sub: 127, mul: -128, div: 0, mod: -1},
+	itd8{a: -1, b: -127, add: -128, sub: 126, mul: 127, div: 0, mod: -1},
+	itd8{a: -1, b: -1, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd8{a: -1, b: 0, add: -1, sub: -1, mul: 0},
+	itd8{a: -1, b: 1, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd8{a: -1, b: 126, add: 125, sub: -127, mul: -126, div: 0, mod: -1},
+	itd8{a: -1, b: 127, add: 126, sub: -128, mul: -127, div: 0, mod: -1},
+	itd8{a: 0, b: -128, add: -128, sub: -128, mul: 0, div: 0, mod: 0},
+	itd8{a: 0, b: -127, add: -127, sub: 127, mul: 0, div: 0, mod: 0},
+	itd8{a: 0, b: -1, add: -1, sub: 1, mul: 0, div: 0, mod: 0},
+	itd8{a: 0, b: 0, add: 0, sub: 0, mul: 0},
+	itd8{a: 0, b: 1, add: 1, sub: -1, mul: 0, div: 0, mod: 0},
+	itd8{a: 0, b: 126, add: 126, sub: -126, mul: 0, div: 0, mod: 0},
+	itd8{a: 0, b: 127, add: 127, sub: -127, mul: 0, div: 0, mod: 0},
+	itd8{a: 1, b: -128, add: -127, sub: -127, mul: -128, div: 0, mod: 1},
+	itd8{a: 1, b: -127, add: -126, sub: -128, mul: -127, div: 0, mod: 1},
+	itd8{a: 1, b: -1, add: 0, sub: 2, mul: -1, div: -1, mod: 0},
+	itd8{a: 1, b: 0, add: 1, sub: 1, mul: 0},
+	itd8{a: 1, b: 1, add: 2, sub: 0, mul: 1, div: 1, mod: 0},
+	itd8{a: 1, b: 126, add: 127, sub: -125, mul: 126, div: 0, mod: 1},
+	itd8{a: 1, b: 127, add: -128, sub: -126, mul: 127, div: 0, mod: 1},
+	itd8{a: 126, b: -128, add: -2, sub: -2, mul: 0, div: 0, mod: 126},
+	itd8{a: 126, b: -127, add: -1, sub: -3, mul: 126, div: 0, mod: 126},
+	itd8{a: 126, b: -1, add: 125, sub: 127, mul: -126, div: -126, mod: 0},
+	itd8{a: 126, b: 0, add: 126, sub: 126, mul: 0},
+	itd8{a: 126, b: 1, add: 127, sub: 125, mul: 126, div: 126, mod: 0},
+	itd8{a: 126, b: 126, add: -4, sub: 0, mul: 4, div: 1, mod: 0},
+	itd8{a: 126, b: 127, add: -3, sub: -1, mul: -126, div: 0, mod: 126},
+	itd8{a: 127, b: -128, add: -1, sub: -1, mul: -128, div: 0, mod: 127},
+	itd8{a: 127, b: -127, add: 0, sub: -2, mul: -1, div: -1, mod: 0},
+	itd8{a: 127, b: -1, add: 126, sub: -128, mul: -127, div: -127, mod: 0},
+	itd8{a: 127, b: 0, add: 127, sub: 127, mul: 0},
+	itd8{a: 127, b: 1, add: -128, sub: 126, mul: 127, div: 127, mod: 0},
+	itd8{a: 127, b: 126, add: -3, sub: 1, mul: -126, div: 1, mod: 1},
+	itd8{a: 127, b: 127, add: -2, sub: 0, mul: 1, div: 1, mod: 0},
+}
+var failed bool
+
+func main() {
+
+	for _, v := range uint64_data {
+		if got := add_uint64_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint64_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint64_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_uint64_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_uint64 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_uint64_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int64_data {
+		if got := add_int64_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int64 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int64_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int64 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int64_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int64 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_int64_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_int64 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_int64_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int64 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint32_data {
+		if got := add_uint32_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint32_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint32_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_uint32_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_uint32 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_uint32_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int32_data {
+		if got := add_int32_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int32 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int32_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int32 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int32_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int32 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_int32_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_int32 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_int32_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int32 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint16_data {
+		if got := add_uint16_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint16_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint16_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_uint16_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_uint16 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_uint16_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int16_data {
+		if got := add_int16_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int16 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int16_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int16 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int16_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int16 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_int16_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_int16 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_int16_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int16 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range uint8_data {
+		if got := add_uint8_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_uint8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_uint8_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_uint8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_uint8_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_uint8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_uint8_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_uint8 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_uint8_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_uint8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	for _, v := range int8_data {
+		if got := add_int8_ssa(v.a, v.b); got != v.add {
+			fmt.Printf("add_int8 %d+%d = %d, wanted %d\n", v.a, v.b, got, v.add)
+			failed = true
+		}
+		if got := sub_int8_ssa(v.a, v.b); got != v.sub {
+			fmt.Printf("sub_int8 %d-%d = %d, wanted %d\n", v.a, v.b, got, v.sub)
+			failed = true
+		}
+		if v.b != 0 {
+			if got := div_int8_ssa(v.a, v.b); got != v.div {
+				fmt.Printf("div_int8 %d/%d = %d, wanted %d\n", v.a, v.b, got, v.div)
+				failed = true
+			}
+
+		}
+		if v.b != 0 {
+			if got := mod_int8_ssa(v.a, v.b); got != v.mod {
+				fmt.Printf("mod_int8 %d%%%d = %d, wanted %d\n", v.a, v.b, got, v.mod)
+				failed = true
+			}
+
+		}
+		if got := mul_int8_ssa(v.a, v.b); got != v.mul {
+			fmt.Printf("mul_int8 %d*%d = %d, wanted %d\n", v.a, v.b, got, v.mul)
+			failed = true
+		}
+	}
+	if failed {
+		panic("tests failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/arithConst_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/arithConst_ssa.go
--- a/src/cmd/compile/internal/gc/testdata/arith_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/arith_ssa.go
@ -0,0 +1,438 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests arithmetic expressions
+
+package main
+
+import "fmt"
+
+const (
+	y = 0x0fffFFFF
+)
+
+//go:noinline
+func invalidAdd_ssa(x uint32) uint32 {
+	return x + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y
+}
+
+//go:noinline
+func invalidSub_ssa(x uint32) uint32 {
+	return x - y - y - y - y - y - y - y - y - y - y - y - y - y - y - y - y - y
+}
+
+//go:noinline
+func invalidMul_ssa(x uint32) uint32 {
+	return x * y * y * y * y * y * y * y * y * y * y * y * y * y * y * y * y * y
+}
+
+// testLargeConst tests a situation where larger than 32 bit consts were passed to ADDL
+// causing an invalid instruction error.
+func testLargeConst() {
+	if want, got := uint32(268435440), invalidAdd_ssa(1); want != got {
+		println("testLargeConst add failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(4026531858), invalidSub_ssa(1); want != got {
+		println("testLargeConst sub failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(268435455), invalidMul_ssa(1); want != got {
+		println("testLargeConst mul failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+// testArithRshConst ensures that "const >> const" right shifts correctly perform
+// sign extension on the lhs constant
+func testArithRshConst() {
+	wantu := uint64(0x4000000000000000)
+	if got := arithRshuConst_ssa(); got != wantu {
+		println("arithRshuConst failed, wanted", wantu, "got", got)
+		failed = true
+	}
+
+	wants := int64(-0x4000000000000000)
+	if got := arithRshConst_ssa(); got != wants {
+		println("arithRshuConst failed, wanted", wants, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func arithRshuConst_ssa() uint64 {
+	y := uint64(0x8000000000000001)
+	z := uint64(1)
+	return uint64(y >> z)
+}
+
+//go:noinline
+func arithRshConst_ssa() int64 {
+	y := int64(-0x8000000000000000)
+	z := uint64(1)
+	return int64(y >> z)
+}
+
+//go:noinline
+func arithConstShift_ssa(x int64) int64 {
+	return x >> 100
+}
+
+// testArithConstShift tests that right shift by large constants preserve
+// the sign of the input.
+func testArithConstShift() {
+	want := int64(-1)
+	if got := arithConstShift_ssa(-1); want != got {
+		println("arithConstShift_ssa(-1) failed, wanted", want, "got", got)
+		failed = true
+	}
+	want = 0
+	if got := arithConstShift_ssa(1); want != got {
+		println("arithConstShift_ssa(1) failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+// overflowConstShift_ssa verifes that constant folding for shift
+// doesn't wrap (i.e. x << MAX_INT << 1 doesn't get folded to x << 0).
+//go:noinline
+func overflowConstShift64_ssa(x int64) int64 {
+	return x << uint64(0xffffffffffffffff) << uint64(1)
+}
+
+//go:noinline
+func overflowConstShift32_ssa(x int64) int32 {
+	return int32(x) << uint32(0xffffffff) << uint32(1)
+}
+
+//go:noinline
+func overflowConstShift16_ssa(x int64) int16 {
+	return int16(x) << uint16(0xffff) << uint16(1)
+}
+
+//go:noinline
+func overflowConstShift8_ssa(x int64) int8 {
+	return int8(x) << uint8(0xff) << uint8(1)
+}
+
+func testOverflowConstShift() {
+	want := int64(0)
+	for x := int64(-127); x < int64(127); x++ {
+		got := overflowConstShift64_ssa(x)
+		if want != got {
+			fmt.Printf("overflowShift64 failed, wanted %d got %d\n", want, got)
+		}
+		got = int64(overflowConstShift32_ssa(x))
+		if want != got {
+			fmt.Printf("overflowShift32 failed, wanted %d got %d\n", want, got)
+		}
+		got = int64(overflowConstShift16_ssa(x))
+		if want != got {
+			fmt.Printf("overflowShift16 failed, wanted %d got %d\n", want, got)
+		}
+		got = int64(overflowConstShift8_ssa(x))
+		if want != got {
+			fmt.Printf("overflowShift8 failed, wanted %d got %d\n", want, got)
+		}
+	}
+}
+
+// test64BitConstMult tests that rewrite rules don't fold 64 bit constants
+// into multiply instructions.
+func test64BitConstMult() {
+	want := int64(103079215109)
+	if got := test64BitConstMult_ssa(1, 2); want != got {
+		println("test64BitConstMult failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func test64BitConstMult_ssa(a, b int64) int64 {
+	return 34359738369*a + b*34359738370
+}
+
+// test64BitConstAdd tests that rewrite rules don't fold 64 bit constants
+// into add instructions.
+func test64BitConstAdd() {
+	want := int64(3567671782835376650)
+	if got := test64BitConstAdd_ssa(1, 2); want != got {
+		println("test64BitConstAdd failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func test64BitConstAdd_ssa(a, b int64) int64 {
+	return a + 575815584948629622 + b + 2991856197886747025
+}
+
+// testRegallocCVSpill tests that regalloc spills a value whose last use is the
+// current value.
+func testRegallocCVSpill() {
+	want := int8(-9)
+	if got := testRegallocCVSpill_ssa(1, 2, 3, 4); want != got {
+		println("testRegallocCVSpill failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func testRegallocCVSpill_ssa(a, b, c, d int8) int8 {
+	return a + -32 + b + 63*c*-87*d
+}
+
+func testBitwiseLogic() {
+	a, b := uint32(57623283), uint32(1314713839)
+	if want, got := uint32(38551779), testBitwiseAnd_ssa(a, b); want != got {
+		println("testBitwiseAnd failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(1333785343), testBitwiseOr_ssa(a, b); want != got {
+		println("testBitwiseOr failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(1295233564), testBitwiseXor_ssa(a, b); want != got {
+		println("testBitwiseXor failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(832), testBitwiseLsh_ssa(13, 4, 2); want != got {
+		println("testBitwiseLsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(0), testBitwiseLsh_ssa(13, 25, 15); want != got {
+		println("testBitwiseLsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(0), testBitwiseLsh_ssa(-13, 25, 15); want != got {
+		println("testBitwiseLsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(-13), testBitwiseRsh_ssa(-832, 4, 2); want != got {
+		println("testBitwiseRsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(0), testBitwiseRsh_ssa(13, 25, 15); want != got {
+		println("testBitwiseRsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := int32(-1), testBitwiseRsh_ssa(-13, 25, 15); want != got {
+		println("testBitwiseRsh failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(0x3ffffff), testBitwiseRshU_ssa(0xffffffff, 4, 2); want != got {
+		println("testBitwiseRshU failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(0), testBitwiseRshU_ssa(13, 25, 15); want != got {
+		println("testBitwiseRshU failed, wanted", want, "got", got)
+		failed = true
+	}
+	if want, got := uint32(0), testBitwiseRshU_ssa(0x8aaaaaaa, 25, 15); want != got {
+		println("testBitwiseRshU failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func testBitwiseAnd_ssa(a, b uint32) uint32 {
+	return a & b
+}
+
+//go:noinline
+func testBitwiseOr_ssa(a, b uint32) uint32 {
+	return a | b
+}
+
+//go:noinline
+func testBitwiseXor_ssa(a, b uint32) uint32 {
+	return a ^ b
+}
+
+//go:noinline
+func testBitwiseLsh_ssa(a int32, b, c uint32) int32 {
+	return a << b << c
+}
+
+//go:noinline
+func testBitwiseRsh_ssa(a int32, b, c uint32) int32 {
+	return a >> b >> c
+}
+
+//go:noinline
+func testBitwiseRshU_ssa(a uint32, b, c uint32) uint32 {
+	return a >> b >> c
+}
+
+//go:noinline
+func testShiftCX_ssa() int {
+	v1 := uint8(3)
+	v4 := (v1 * v1) ^ v1 | v1 - v1 - v1&v1 ^ uint8(3+2) + v1*1>>0 - v1 | 1 | v1<<(2*3|0-0*0^1)
+	v5 := v4>>(3-0-uint(3)) | v1 | v1 + v1 ^ v4<<(0+1|3&1)<<(uint64(1)<<0*2*0<<0) ^ v1
+	v6 := v5 ^ (v1+v1)*v1 | v1 | v1*v1>>(v1&v1)>>(uint(1)<<0*uint(3)>>1)*v1<<2*v1<<v1 - v1>>2 | (v4 - v1) ^ v1 + v1 ^ v1>>1 | v1 + v1 - v1 ^ v1
+	v7 := v6 & v5 << 0
+	v1++
+	v11 := 2&1 ^ 0 + 3 | int(0^0)<<1>>(1*0*3) ^ 0*0 ^ 3&0*3&3 ^ 3*3 ^ 1 ^ int(2)<<(2*3) + 2 | 2 | 2 ^ 2 + 1 | 3 | 0 ^ int(1)>>1 ^ 2 // int
+	v7--
+	return int(uint64(2*1)<<(3-2)<<uint(3>>v7)-2)&v11 | v11 - int(2)<<0>>(2-1)*(v11*0&v11<<1<<(uint8(2)+v4))
+}
+
+func testShiftCX() {
+	want := 141
+	if got := testShiftCX_ssa(); want != got {
+		println("testShiftCX failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+// testSubqToNegq ensures that the SUBQ -> NEGQ translation works correctly.
+func testSubqToNegq() {
+	want := int64(-318294940372190156)
+	if got := testSubqToNegq_ssa(1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2); want != got {
+		println("testSubqToNegq failed, wanted", want, "got", got)
+		failed = true
+	}
+}
+
+//go:noinline
+func testSubqToNegq_ssa(a, b, c, d, e, f, g, h, i, j, k int64) int64 {
+	return a + 8207351403619448057 - b - 1779494519303207690 + c*8810076340510052032*d - 4465874067674546219 - e*4361839741470334295 - f + 8688847565426072650*g*8065564729145417479
+}
+
+func testOcom() {
+	want1, want2 := int32(0x55555555), int32(-0x55555556)
+	if got1, got2 := testOcom_ssa(0x55555555, 0x55555555); want1 != got1 || want2 != got2 {
+		println("testSubqToNegq failed, wanted", want1, "and", want2,
+			"got", got1, "and", got2)
+		failed = true
+	}
+}
+
+//go:noinline
+func testOcom_ssa(a, b int32) (int32, int32) {
+	return ^^^^a, ^^^^^b
+}
+
+func lrot1_ssa(w uint8, x uint16, y uint32, z uint64) (a uint8, b uint16, c uint32, d uint64) {
+	a = (w << 5) | (w >> 3)
+	b = (x << 13) | (x >> 3)
+	c = (y << 29) | (y >> 3)
+	d = (z << 61) | (z >> 3)
+	return
+}
+
+//go:noinline
+func lrot2_ssa(w, n uint32) uint32 {
+	// Want to be sure that a "rotate by 32" which
+	// is really 0 | (w >> 0) == w
+	// is correctly compiled.
+	return (w << n) | (w >> (32 - n))
+}
+
+//go:noinline
+func lrot3_ssa(w uint32) uint32 {
+	// Want to be sure that a "rotate by 32" which
+	// is really 0 | (w >> 0) == w
+	// is correctly compiled.
+	return (w << 32) | (w >> (32 - 32))
+}
+
+func testLrot() {
+	wantA, wantB, wantC, wantD := uint8(0xe1), uint16(0xe001),
+		uint32(0xe0000001), uint64(0xe000000000000001)
+	a, b, c, d := lrot1_ssa(0xf, 0xf, 0xf, 0xf)
+	if a != wantA || b != wantB || c != wantC || d != wantD {
+		println("lrot1_ssa(0xf, 0xf, 0xf, 0xf)=",
+			wantA, wantB, wantC, wantD, ", got", a, b, c, d)
+		failed = true
+	}
+	x := lrot2_ssa(0xb0000001, 32)
+	wantX := uint32(0xb0000001)
+	if x != wantX {
+		println("lrot2_ssa(0xb0000001, 32)=",
+			wantX, ", got", x)
+		failed = true
+	}
+	x = lrot3_ssa(0xb0000001)
+	if x != wantX {
+		println("lrot3_ssa(0xb0000001)=",
+			wantX, ", got", x)
+		failed = true
+	}
+
+}
+
+//go:noinline
+func sub1_ssa() uint64 {
+	v1 := uint64(3) // uint64
+	return v1*v1 - (v1&v1)&v1
+}
+func sub2_ssa() uint8 {
+	switch {
+	}
+	v1 := uint8(0)
+	v3 := v1 + v1 + v1 ^ v1 | 3 + v1 ^ v1 | v1 ^ v1
+	v1-- // dev.ssa doesn't see this one
+	return v1 ^ v1*v1 - v3
+}
+
+func testSubConst() {
+	x1 := sub1_ssa()
+	want1 := uint64(6)
+	if x1 != want1 {
+		println("sub1_ssa()=", want1, ", got", x1)
+		failed = true
+	}
+	x2 := sub2_ssa()
+	want2 := uint8(251)
+	if x2 != want2 {
+		println("sub2_ssa()=", want2, ", got", x2)
+		failed = true
+	}
+}
+
+//go:noinline
+func orPhi_ssa(a bool, x int) int {
+	v := 0
+	if a {
+		v = -1
+	} else {
+		v = -1
+	}
+	return x | v
+}
+
+func testOrPhi() {
+	if want, got := -1, orPhi_ssa(true, 4); got != want {
+		println("orPhi_ssa(true, 4)=", got, " want ", want)
+	}
+	if want, got := -1, orPhi_ssa(false, 0); got != want {
+		println("orPhi_ssa(false, 0)=", got, " want ", want)
+	}
+}
+
+var failed = false
+
+func main() {
+
+	test64BitConstMult()
+	test64BitConstAdd()
+	testRegallocCVSpill()
+	testSubqToNegq()
+	testBitwiseLogic()
+	testOcom()
+	testLrot()
+	testShiftCX()
+	testSubConst()
+	testOverflowConstShift()
+	testArithConstShift()
+	testArithRshConst()
+	testLargeConst()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/array_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/array_ssa.go
@ -0,0 +1,142 @@
+package main
+
+var failed = false
+
+//go:noinline
+func testSliceLenCap12_ssa(a [10]int, i, j int) (int, int) {
+	b := a[i:j]
+	return len(b), cap(b)
+}
+
+//go:noinline
+func testSliceLenCap1_ssa(a [10]int, i, j int) (int, int) {
+	b := a[i:]
+	return len(b), cap(b)
+}
+
+//go:noinline
+func testSliceLenCap2_ssa(a [10]int, i, j int) (int, int) {
+	b := a[:j]
+	return len(b), cap(b)
+}
+
+func testSliceLenCap() {
+	a := [10]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
+	tests := [...]struct {
+		fn   func(a [10]int, i, j int) (int, int)
+		i, j int // slice range
+		l, c int // len, cap
+	}{
+		// -1 means the value is not used.
+		{testSliceLenCap12_ssa, 0, 0, 0, 10},
+		{testSliceLenCap12_ssa, 0, 1, 1, 10},
+		{testSliceLenCap12_ssa, 0, 10, 10, 10},
+		{testSliceLenCap12_ssa, 10, 10, 0, 0},
+		{testSliceLenCap12_ssa, 0, 5, 5, 10},
+		{testSliceLenCap12_ssa, 5, 5, 0, 5},
+		{testSliceLenCap12_ssa, 5, 10, 5, 5},
+		{testSliceLenCap1_ssa, 0, -1, 0, 10},
+		{testSliceLenCap1_ssa, 5, -1, 5, 5},
+		{testSliceLenCap1_ssa, 10, -1, 0, 0},
+		{testSliceLenCap2_ssa, -1, 0, 0, 10},
+		{testSliceLenCap2_ssa, -1, 5, 5, 10},
+		{testSliceLenCap2_ssa, -1, 10, 10, 10},
+	}
+
+	for i, t := range tests {
+		if l, c := t.fn(a, t.i, t.j); l != t.l && c != t.c {
+			println("#", i, " len(a[", t.i, ":", t.j, "]), cap(a[", t.i, ":", t.j, "]) =", l, c,
+				", want", t.l, t.c)
+			failed = true
+		}
+	}
+}
+
+//go:noinline
+func testSliceGetElement_ssa(a [10]int, i, j, p int) int {
+	return a[i:j][p]
+}
+
+func testSliceGetElement() {
+	a := [10]int{0, 10, 20, 30, 40, 50, 60, 70, 80, 90}
+	tests := [...]struct {
+		i, j, p int
+		want    int // a[i:j][p]
+	}{
+		{0, 10, 2, 20},
+		{0, 5, 4, 40},
+		{5, 10, 3, 80},
+		{1, 9, 7, 80},
+	}
+
+	for i, t := range tests {
+		if got := testSliceGetElement_ssa(a, t.i, t.j, t.p); got != t.want {
+			println("#", i, " a[", t.i, ":", t.j, "][", t.p, "] = ", got, " wanted ", t.want)
+			failed = true
+		}
+	}
+}
+
+//go:noinline
+func testSliceSetElement_ssa(a *[10]int, i, j, p, x int) {
+	(*a)[i:j][p] = x
+}
+
+func testSliceSetElement() {
+	a := [10]int{0, 10, 20, 30, 40, 50, 60, 70, 80, 90}
+	tests := [...]struct {
+		i, j, p int
+		want    int // a[i:j][p]
+	}{
+		{0, 10, 2, 17},
+		{0, 5, 4, 11},
+		{5, 10, 3, 28},
+		{1, 9, 7, 99},
+	}
+
+	for i, t := range tests {
+		testSliceSetElement_ssa(&a, t.i, t.j, t.p, t.want)
+		if got := a[t.i+t.p]; got != t.want {
+			println("#", i, " a[", t.i, ":", t.j, "][", t.p, "] = ", got, " wanted ", t.want)
+			failed = true
+		}
+	}
+}
+
+func testSlicePanic1() {
+	defer func() {
+		if r := recover(); r != nil {
+			println("paniced as expected")
+		}
+	}()
+
+	a := [10]int{0, 10, 20, 30, 40, 50, 60, 70, 80, 90}
+	testSliceLenCap12_ssa(a, 3, 12)
+	println("expected to panic, but didn't")
+	failed = true
+}
+
+func testSlicePanic2() {
+	defer func() {
+		if r := recover(); r != nil {
+			println("paniced as expected")
+		}
+	}()
+
+	a := [10]int{0, 10, 20, 30, 40, 50, 60, 70, 80, 90}
+	testSliceGetElement_ssa(a, 3, 7, 4)
+	println("expected to panic, but didn't")
+	failed = true
+}
+
+func main() {
+	testSliceLenCap()
+	testSliceGetElement()
+	testSliceSetElement()
+	testSlicePanic1()
+	testSlicePanic2()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/assert_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/assert_ssa.go
@ -0,0 +1,147 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests type assertion expressions and statements
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+type (
+	S struct{}
+	T struct{}
+
+	I interface {
+		F()
+	}
+)
+
+var (
+	s *S
+	t *T
+)
+
+func (s *S) F() {}
+func (t *T) F() {}
+
+func e2t_ssa(e interface{}) *T {
+	return e.(*T)
+}
+
+func i2t_ssa(i I) *T {
+	return i.(*T)
+}
+
+func testAssertE2TOk() {
+	if got := e2t_ssa(t); got != t {
+		fmt.Printf("e2t_ssa(t)=%v want %v", got, t)
+		failed = true
+	}
+}
+
+func testAssertE2TPanic() {
+	var got *T
+	defer func() {
+		if got != nil {
+			fmt.Printf("e2t_ssa(s)=%v want nil", got)
+			failed = true
+		}
+		e := recover()
+		err, ok := e.(*runtime.TypeAssertionError)
+		if !ok {
+			fmt.Printf("e2t_ssa(s) panic type %T", e)
+			failed = true
+		}
+		want := "interface conversion: interface {} is *main.S, not *main.T"
+		if err.Error() != want {
+			fmt.Printf("e2t_ssa(s) wrong error, want '%s', got '%s'\n", want, err.Error())
+			failed = true
+		}
+	}()
+	got = e2t_ssa(s)
+	fmt.Printf("e2t_ssa(s) should panic")
+	failed = true
+}
+
+func testAssertI2TOk() {
+	if got := i2t_ssa(t); got != t {
+		fmt.Printf("i2t_ssa(t)=%v want %v", got, t)
+		failed = true
+	}
+}
+
+func testAssertI2TPanic() {
+	var got *T
+	defer func() {
+		if got != nil {
+			fmt.Printf("i2t_ssa(s)=%v want nil", got)
+			failed = true
+		}
+		e := recover()
+		err, ok := e.(*runtime.TypeAssertionError)
+		if !ok {
+			fmt.Printf("i2t_ssa(s) panic type %T", e)
+			failed = true
+		}
+		want := "interface conversion: main.I is *main.S, not *main.T"
+		if err.Error() != want {
+			fmt.Printf("i2t_ssa(s) wrong error, want '%s', got '%s'\n", want, err.Error())
+			failed = true
+		}
+	}()
+	got = i2t_ssa(s)
+	fmt.Printf("i2t_ssa(s) should panic")
+	failed = true
+}
+
+func e2t2_ssa(e interface{}) (*T, bool) {
+	t, ok := e.(*T)
+	return t, ok
+}
+
+func i2t2_ssa(i I) (*T, bool) {
+	t, ok := i.(*T)
+	return t, ok
+}
+
+func testAssertE2T2() {
+	if got, ok := e2t2_ssa(t); !ok || got != t {
+		fmt.Printf("e2t2_ssa(t)=(%v, %v) want (%v, %v)", got, ok, t, true)
+		failed = true
+	}
+	if got, ok := e2t2_ssa(s); ok || got != nil {
+		fmt.Printf("e2t2_ssa(s)=(%v, %v) want (%v, %v)", got, ok, nil, false)
+		failed = true
+	}
+}
+
+func testAssertI2T2() {
+	if got, ok := i2t2_ssa(t); !ok || got != t {
+		fmt.Printf("i2t2_ssa(t)=(%v, %v) want (%v, %v)", got, ok, t, true)
+		failed = true
+	}
+	if got, ok := i2t2_ssa(s); ok || got != nil {
+		fmt.Printf("i2t2_ssa(s)=(%v, %v) want (%v, %v)", got, ok, nil, false)
+		failed = true
+	}
+}
+
+var failed = false
+
+func main() {
+	testAssertE2TOk()
+	testAssertE2TPanic()
+	testAssertI2TOk()
+	testAssertI2TPanic()
+	testAssertE2T2()
+	testAssertI2T2()
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/break_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/break_ssa.go
@ -0,0 +1,255 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests continue and break.
+
+package main
+
+func continuePlain_ssa() int {
+	var n int
+	for i := 0; i < 10; i++ {
+		if i == 6 {
+			continue
+		}
+		n = i
+	}
+	return n
+}
+
+func continueLabeled_ssa() int {
+	var n int
+Next:
+	for i := 0; i < 10; i++ {
+		if i == 6 {
+			continue Next
+		}
+		n = i
+	}
+	return n
+}
+
+func continuePlainInner_ssa() int {
+	var n int
+	for j := 0; j < 30; j += 10 {
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				continue
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+func continueLabeledInner_ssa() int {
+	var n int
+	for j := 0; j < 30; j += 10 {
+	Next:
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				continue Next
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+func continueLabeledOuter_ssa() int {
+	var n int
+Next:
+	for j := 0; j < 30; j += 10 {
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				continue Next
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+func breakPlain_ssa() int {
+	var n int
+	for i := 0; i < 10; i++ {
+		if i == 6 {
+			break
+		}
+		n = i
+	}
+	return n
+}
+
+func breakLabeled_ssa() int {
+	var n int
+Next:
+	for i := 0; i < 10; i++ {
+		if i == 6 {
+			break Next
+		}
+		n = i
+	}
+	return n
+}
+
+func breakPlainInner_ssa() int {
+	var n int
+	for j := 0; j < 30; j += 10 {
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				break
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+func breakLabeledInner_ssa() int {
+	var n int
+	for j := 0; j < 30; j += 10 {
+	Next:
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				break Next
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+func breakLabeledOuter_ssa() int {
+	var n int
+Next:
+	for j := 0; j < 30; j += 10 {
+		for i := 0; i < 10; i++ {
+			if i == 6 {
+				break Next
+			}
+			n = i
+		}
+		n += j
+	}
+	return n
+}
+
+var g, h int // globals to ensure optimizations don't collapse our switch statements
+
+func switchPlain_ssa() int {
+	var n int
+	switch g {
+	case 0:
+		n = 1
+		break
+		n = 2
+	}
+	return n
+}
+
+func switchLabeled_ssa() int {
+	var n int
+Done:
+	switch g {
+	case 0:
+		n = 1
+		break Done
+		n = 2
+	}
+	return n
+}
+
+func switchPlainInner_ssa() int {
+	var n int
+	switch g {
+	case 0:
+		n = 1
+		switch h {
+		case 0:
+			n += 10
+			break
+		}
+		n = 2
+	}
+	return n
+}
+
+func switchLabeledInner_ssa() int {
+	var n int
+	switch g {
+	case 0:
+		n = 1
+	Done:
+		switch h {
+		case 0:
+			n += 10
+			break Done
+		}
+		n = 2
+	}
+	return n
+}
+
+func switchLabeledOuter_ssa() int {
+	var n int
+Done:
+	switch g {
+	case 0:
+		n = 1
+		switch h {
+		case 0:
+			n += 10
+			break Done
+		}
+		n = 2
+	}
+	return n
+}
+
+func main() {
+	tests := [...]struct {
+		name string
+		fn   func() int
+		want int
+	}{
+		{"continuePlain_ssa", continuePlain_ssa, 9},
+		{"continueLabeled_ssa", continueLabeled_ssa, 9},
+		{"continuePlainInner_ssa", continuePlainInner_ssa, 29},
+		{"continueLabeledInner_ssa", continueLabeledInner_ssa, 29},
+		{"continueLabeledOuter_ssa", continueLabeledOuter_ssa, 5},
+
+		{"breakPlain_ssa", breakPlain_ssa, 5},
+		{"breakLabeled_ssa", breakLabeled_ssa, 5},
+		{"breakPlainInner_ssa", breakPlainInner_ssa, 25},
+		{"breakLabeledInner_ssa", breakLabeledInner_ssa, 25},
+		{"breakLabeledOuter_ssa", breakLabeledOuter_ssa, 5},
+
+		{"switchPlain_ssa", switchPlain_ssa, 1},
+		{"switchLabeled_ssa", switchLabeled_ssa, 1},
+		{"switchPlainInner_ssa", switchPlainInner_ssa, 2},
+		{"switchLabeledInner_ssa", switchLabeledInner_ssa, 2},
+		{"switchLabeledOuter_ssa", switchLabeledOuter_ssa, 11},
+
+		// no select tests; they're identical to switch
+	}
+
+	var failed bool
+	for _, test := range tests {
+		if got := test.fn(); test.fn() != test.want {
+			print(test.name, "()=", got, ", want ", test.want, "\n")
+			failed = true
+		}
+	}
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/chan_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/chan_ssa.go
@ -0,0 +1,73 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// chan_ssa.go tests chan operations.
+package main
+
+import "fmt"
+
+var failed = false
+
+//go:noinline
+func lenChan_ssa(v chan int) int {
+	return len(v)
+}
+
+//go:noinline
+func capChan_ssa(v chan int) int {
+	return cap(v)
+}
+
+func testLenChan() {
+
+	v := make(chan int, 10)
+	v <- 1
+	v <- 1
+	v <- 1
+
+	if want, got := 3, lenChan_ssa(v); got != want {
+		fmt.Printf("expected len(chan) = %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func testLenNilChan() {
+
+	var v chan int
+	if want, got := 0, lenChan_ssa(v); got != want {
+		fmt.Printf("expected len(nil) = %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func testCapChan() {
+
+	v := make(chan int, 25)
+
+	if want, got := 25, capChan_ssa(v); got != want {
+		fmt.Printf("expected cap(chan) = %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func testCapNilChan() {
+
+	var v chan int
+	if want, got := 0, capChan_ssa(v); got != want {
+		fmt.Printf("expected cap(nil) = %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func main() {
+	testLenChan()
+	testLenNilChan()
+
+	testCapChan()
+	testCapNilChan()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/closure_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/closure_ssa.go
@ -0,0 +1,38 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// map_ssa.go tests map operations.
+package main
+
+import "fmt"
+
+var failed = false
+
+//go:noinline
+func testCFunc_ssa() int {
+	a := 0
+	b := func() {
+		switch {
+		}
+		a++
+	}
+	b()
+	b()
+	return a
+}
+
+func testCFunc() {
+	if want, got := 2, testCFunc_ssa(); got != want {
+		fmt.Printf("expected %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func main() {
+	testCFunc()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/cmp_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/cmp_ssa.go
@ -0,0 +1,48 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// cmp_ssa.go tests compare simplification operations.
+package main
+
+import "fmt"
+
+var failed = false
+
+//go:noinline
+func eq_ssa(a int64) bool {
+	return 4+a == 10
+}
+
+//go:noinline
+func neq_ssa(a int64) bool {
+	return 10 != a+4
+}
+
+func testCmp() {
+	if wanted, got := true, eq_ssa(6); wanted != got {
+		fmt.Printf("eq_ssa: expected %v, got %v\n", wanted, got)
+		failed = true
+	}
+	if wanted, got := false, eq_ssa(7); wanted != got {
+		fmt.Printf("eq_ssa: expected %v, got %v\n", wanted, got)
+		failed = true
+	}
+
+	if wanted, got := false, neq_ssa(6); wanted != got {
+		fmt.Printf("neq_ssa: expected %v, got %v\n", wanted, got)
+		failed = true
+	}
+	if wanted, got := true, neq_ssa(7); wanted != got {
+		fmt.Printf("neq_ssa: expected %v, got %v\n", wanted, got)
+		failed = true
+	}
+}
+
+func main() {
+	testCmp()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/compound_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/compound_ssa.go
@ -0,0 +1,145 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test compound objects
+
+package main
+
+import "fmt"
+
+func string_ssa(a, b string, x bool) string {
+	s := ""
+	if x {
+		s = a
+	} else {
+		s = b
+	}
+	return s
+}
+
+func testString() {
+	a := "foo"
+	b := "barz"
+	if want, got := a, string_ssa(a, b, true); got != want {
+		fmt.Printf("string_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+	if want, got := b, string_ssa(a, b, false); got != want {
+		fmt.Printf("string_ssa(%v, %v, false) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+}
+
+func complex64_ssa(a, b complex64, x bool) complex64 {
+	switch {
+	}
+	var c complex64
+	if x {
+		c = a
+	} else {
+		c = b
+	}
+	return c
+}
+
+func complex128_ssa(a, b complex128, x bool) complex128 {
+	switch {
+	}
+	var c complex128
+	if x {
+		c = a
+	} else {
+		c = b
+	}
+	return c
+}
+
+func testComplex64() {
+	var a complex64 = 1 + 2i
+	var b complex64 = 3 + 4i
+
+	if want, got := a, complex64_ssa(a, b, true); got != want {
+		fmt.Printf("complex64_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+	if want, got := b, complex64_ssa(a, b, false); got != want {
+		fmt.Printf("complex64_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+}
+
+func testComplex128() {
+	var a complex128 = 1 + 2i
+	var b complex128 = 3 + 4i
+
+	if want, got := a, complex128_ssa(a, b, true); got != want {
+		fmt.Printf("complex128_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+	if want, got := b, complex128_ssa(a, b, false); got != want {
+		fmt.Printf("complex128_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+}
+
+func slice_ssa(a, b []byte, x bool) []byte {
+	var s []byte
+	if x {
+		s = a
+	} else {
+		s = b
+	}
+	return s
+}
+
+func testSlice() {
+	a := []byte{3, 4, 5}
+	b := []byte{7, 8, 9}
+	if want, got := byte(3), slice_ssa(a, b, true)[0]; got != want {
+		fmt.Printf("slice_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+	if want, got := byte(7), slice_ssa(a, b, false)[0]; got != want {
+		fmt.Printf("slice_ssa(%v, %v, false) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+}
+
+func interface_ssa(a, b interface{}, x bool) interface{} {
+	var s interface{}
+	if x {
+		s = a
+	} else {
+		s = b
+	}
+	return s
+}
+
+func testInterface() {
+	a := interface{}(3)
+	b := interface{}(4)
+	if want, got := 3, interface_ssa(a, b, true).(int); got != want {
+		fmt.Printf("interface_ssa(%v, %v, true) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+	if want, got := 4, interface_ssa(a, b, false).(int); got != want {
+		fmt.Printf("interface_ssa(%v, %v, false) = %v, want %v\n", a, b, got, want)
+		failed = true
+	}
+}
+
+var failed = false
+
+func main() {
+	testString()
+	testSlice()
+	testInterface()
+	testComplex64()
+	testComplex128()
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/copy_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/copy_ssa.go
--- a/src/cmd/compile/internal/gc/testdata/ctl_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/ctl_ssa.go
@ -0,0 +1,161 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test control flow
+
+package main
+
+// nor_ssa calculates NOR(a, b).
+// It is implemented in a way that generates
+// phi control values.
+func nor_ssa(a, b bool) bool {
+	var c bool
+	if a {
+		c = true
+	}
+	if b {
+		c = true
+	}
+	if c {
+		return false
+	}
+	return true
+}
+
+func testPhiControl() {
+	tests := [...][3]bool{ // a, b, want
+		{false, false, true},
+		{true, false, false},
+		{false, true, false},
+		{true, true, false},
+	}
+	for _, test := range tests {
+		a, b := test[0], test[1]
+		got := nor_ssa(a, b)
+		want := test[2]
+		if want != got {
+			print("nor(", a, ", ", b, ")=", want, " got ", got, "\n")
+			failed = true
+		}
+	}
+}
+
+func emptyRange_ssa(b []byte) bool {
+	for _, x := range b {
+		_ = x
+	}
+	return true
+}
+
+func testEmptyRange() {
+	if !emptyRange_ssa([]byte{}) {
+		println("emptyRange_ssa([]byte{})=false, want true")
+		failed = true
+	}
+}
+
+func switch_ssa(a int) int {
+	ret := 0
+	switch a {
+	case 5:
+		ret += 5
+	case 4:
+		ret += 4
+	case 3:
+		ret += 3
+	case 2:
+		ret += 2
+	case 1:
+		ret += 1
+	}
+	return ret
+
+}
+
+func fallthrough_ssa(a int) int {
+	ret := 0
+	switch a {
+	case 5:
+		ret++
+		fallthrough
+	case 4:
+		ret++
+		fallthrough
+	case 3:
+		ret++
+		fallthrough
+	case 2:
+		ret++
+		fallthrough
+	case 1:
+		ret++
+	}
+	return ret
+
+}
+
+func testFallthrough() {
+	for i := 0; i < 6; i++ {
+		if got := fallthrough_ssa(i); got != i {
+			println("fallthrough_ssa(i) =", got, "wanted", i)
+			failed = true
+		}
+	}
+}
+
+func testSwitch() {
+	for i := 0; i < 6; i++ {
+		if got := switch_ssa(i); got != i {
+			println("switch_ssa(i) =", got, "wanted", i)
+			failed = true
+		}
+	}
+}
+
+type junk struct {
+	step int
+}
+
+// flagOverwrite_ssa is intended to reproduce an issue seen where a XOR
+// was scheduled between a compare and branch, clearing flags.
+func flagOverwrite_ssa(s *junk, c int) int {
+	switch {
+	}
+	if '0' <= c && c <= '9' {
+		s.step = 0
+		return 1
+	}
+	if c == 'e' || c == 'E' {
+		s.step = 0
+		return 2
+	}
+	s.step = 0
+	return 3
+}
+
+func testFlagOverwrite() {
+	j := junk{}
+	if got := flagOverwrite_ssa(&j, ' '); got != 3 {
+		println("flagOverwrite_ssa =", got, "wanted 3")
+		failed = true
+	}
+}
+
+var failed = false
+
+func main() {
+	testPhiControl()
+	testEmptyRange()
+
+	testSwitch()
+	testFallthrough()
+
+	testFlagOverwrite()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/deferNoReturn_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/deferNoReturn_ssa.go
@ -0,0 +1,17 @@
+// compile
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that a defer in a function with no return
+// statement will compile correctly.
+
+package foo
+
+func deferNoReturn_ssa() {
+	defer func() { println("returned") }()
+	for {
+		println("loop")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/fp_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/fp_ssa.go
--- a/src/cmd/compile/internal/gc/testdata/gen/arithBoundaryGen.go
+++ b/src/cmd/compile/internal/gc/testdata/gen/arithBoundaryGen.go
@ -0,0 +1,214 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program generates a test to verify that the standard arithmetic
+// operators properly handle some special cases.  The test file should be
+// generated with a known working version of go.
+// launch with `go run arithBoundaryGen.go` a file called arithBoundary_ssa.go
+// will be written into the parent directory containing the tests
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+	"text/template"
+)
+
+// used for interpolation in a text template
+type tmplData struct {
+	Name, Stype, Symbol string
+}
+
+// used to work around an issue with the mod symbol being
+// interpreted as part of a format string
+func (s tmplData) SymFirst() string {
+	return string(s.Symbol[0])
+}
+
+// ucast casts an unsigned int to the size in s
+func ucast(i uint64, s sizedTestData) uint64 {
+	switch s.name {
+	case "uint32":
+		return uint64(uint32(i))
+	case "uint16":
+		return uint64(uint16(i))
+	case "uint8":
+		return uint64(uint8(i))
+	}
+	return i
+}
+
+// icast casts a signed int to the size in s
+func icast(i int64, s sizedTestData) int64 {
+	switch s.name {
+	case "int32":
+		return int64(int32(i))
+	case "int16":
+		return int64(int16(i))
+	case "int8":
+		return int64(int8(i))
+	}
+	return i
+}
+
+type sizedTestData struct {
+	name string
+	sn   string
+	u    []uint64
+	i    []int64
+}
+
+// values to generate tests. these should include the smallest and largest values, along
+// with any other values that might cause issues. we generate n^2 tests for each size to
+// cover all cases.
+var szs = []sizedTestData{
+	sizedTestData{name: "uint64", sn: "64", u: []uint64{0, 1, 4294967296, 0xffffFFFFffffFFFF}},
+	sizedTestData{name: "int64", sn: "64", i: []int64{-0x8000000000000000, -0x7FFFFFFFFFFFFFFF,
+		-4294967296, -1, 0, 1, 4294967296, 0x7FFFFFFFFFFFFFFE, 0x7FFFFFFFFFFFFFFF}},
+
+	sizedTestData{name: "uint32", sn: "32", u: []uint64{0, 1, 4294967295}},
+	sizedTestData{name: "int32", sn: "32", i: []int64{-0x80000000, -0x7FFFFFFF, -1, 0,
+		1, 0x7FFFFFFF}},
+
+	sizedTestData{name: "uint16", sn: "16", u: []uint64{0, 1, 65535}},
+	sizedTestData{name: "int16", sn: "16", i: []int64{-32768, -32767, -1, 0, 1, 32766, 32767}},
+
+	sizedTestData{name: "uint8", sn: "8", u: []uint64{0, 1, 255}},
+	sizedTestData{name: "int8", sn: "8", i: []int64{-128, -127, -1, 0, 1, 126, 127}},
+}
+
+type op struct {
+	name, symbol string
+}
+
+// ops that we will be generating tests for
+var ops = []op{op{"add", "+"}, op{"sub", "-"}, op{"div", "/"}, op{"mod", "%%"}, op{"mul", "*"}}
+
+func main() {
+
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "package main;\n")
+	fmt.Fprintf(w, "import \"fmt\"\n")
+
+	for _, sz := range []int{64, 32, 16, 8} {
+		fmt.Fprintf(w, "type utd%d struct {\n", sz)
+		fmt.Fprintf(w, "  a,b uint%d\n", sz)
+		fmt.Fprintf(w, "  add,sub,mul,div,mod uint%d\n", sz)
+		fmt.Fprintf(w, "}\n")
+
+		fmt.Fprintf(w, "type itd%d struct {\n", sz)
+		fmt.Fprintf(w, "  a,b int%d\n", sz)
+		fmt.Fprintf(w, "  add,sub,mul,div,mod int%d\n", sz)
+		fmt.Fprintf(w, "}\n")
+	}
+
+	// the function being tested
+	testFunc, err := template.New("testFunc").Parse(
+		`//go:noinline
+		func {{.Name}}_{{.Stype}}_ssa(a, b {{.Stype}}) {{.Stype}} {
+	return a {{.SymFirst}} b
+}
+`)
+	if err != nil {
+		panic(err)
+	}
+
+	// generate our functions to be tested
+	for _, s := range szs {
+		for _, o := range ops {
+			fd := tmplData{o.name, s.name, o.symbol}
+			err = testFunc.Execute(w, fd)
+			if err != nil {
+				panic(err)
+			}
+		}
+	}
+
+	// generate the test data
+	for _, s := range szs {
+		if len(s.u) > 0 {
+			fmt.Fprintf(w, "var %s_data []utd%s = []utd%s{", s.name, s.sn, s.sn)
+			for _, i := range s.u {
+				for _, j := range s.u {
+					fmt.Fprintf(w, "utd%s{a: %d, b: %d, add: %d, sub: %d, mul: %d", s.sn, i, j, ucast(i+j, s), ucast(i-j, s), ucast(i*j, s))
+					if j != 0 {
+						fmt.Fprintf(w, ", div: %d, mod: %d", ucast(i/j, s), ucast(i%j, s))
+					}
+					fmt.Fprint(w, "},\n")
+				}
+			}
+			fmt.Fprintf(w, "}\n")
+		} else {
+			// TODO: clean up this duplication
+			fmt.Fprintf(w, "var %s_data []itd%s = []itd%s{", s.name, s.sn, s.sn)
+			for _, i := range s.i {
+				for _, j := range s.i {
+					fmt.Fprintf(w, "itd%s{a: %d, b: %d, add: %d, sub: %d, mul: %d", s.sn, i, j, icast(i+j, s), icast(i-j, s), icast(i*j, s))
+					if j != 0 {
+						fmt.Fprintf(w, ", div: %d, mod: %d", icast(i/j, s), icast(i%j, s))
+					}
+					fmt.Fprint(w, "},\n")
+				}
+			}
+			fmt.Fprintf(w, "}\n")
+		}
+	}
+
+	fmt.Fprintf(w, "var failed bool\n\n")
+	fmt.Fprintf(w, "func main() {\n\n")
+
+	verify, err := template.New("tst").Parse(
+		`if got := {{.Name}}_{{.Stype}}_ssa(v.a, v.b); got != v.{{.Name}} {
+       fmt.Printf("{{.Name}}_{{.Stype}} %d{{.Symbol}}%d = %d, wanted %d\n",v.a,v.b,got,v.{{.Name}})
+       failed = true
+}
+`)
+
+	for _, s := range szs {
+		fmt.Fprintf(w, "for _, v := range %s_data {\n", s.name)
+
+		for _, o := range ops {
+			// avoid generating tests that divide by zero
+			if o.name == "div" || o.name == "mod" {
+				fmt.Fprint(w, "if v.b != 0 {")
+			}
+
+			err = verify.Execute(w, tmplData{o.name, s.name, o.symbol})
+
+			if o.name == "div" || o.name == "mod" {
+				fmt.Fprint(w, "\n}\n")
+			}
+
+			if err != nil {
+				panic(err)
+			}
+
+		}
+		fmt.Fprint(w, "    }\n")
+	}
+
+	fmt.Fprintf(w, `if failed {
+        panic("tests failed")
+    }
+`)
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// write to file
+	err = ioutil.WriteFile("../arithBoundary_ssa.go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/gen/arithConstGen.go
+++ b/src/cmd/compile/internal/gc/testdata/gen/arithConstGen.go
@ -0,0 +1,294 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program generates a test to verify that the standard arithmetic
+// operators properly handle const cases.  The test file should be
+// generated with a known working version of go.
+// launch with `go run arithConstGen.go` a file called arithConst_ssa.go
+// will be written into the parent directory containing the tests
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+	"strings"
+	"text/template"
+)
+
+type op struct {
+	name, symbol string
+}
+type szD struct {
+	name string
+	sn   string
+	u    []uint64
+	i    []int64
+}
+
+var szs []szD = []szD{
+	szD{name: "uint64", sn: "64", u: []uint64{0, 1, 4294967296, 0xffffFFFFffffFFFF}},
+	szD{name: "int64", sn: "64", i: []int64{-0x8000000000000000, -0x7FFFFFFFFFFFFFFF,
+		-4294967296, -1, 0, 1, 4294967296, 0x7FFFFFFFFFFFFFFE, 0x7FFFFFFFFFFFFFFF}},
+
+	szD{name: "uint32", sn: "32", u: []uint64{0, 1, 4294967295}},
+	szD{name: "int32", sn: "32", i: []int64{-0x80000000, -0x7FFFFFFF, -1, 0,
+		1, 0x7FFFFFFF}},
+
+	szD{name: "uint16", sn: "16", u: []uint64{0, 1, 65535}},
+	szD{name: "int16", sn: "16", i: []int64{-32768, -32767, -1, 0, 1, 32766, 32767}},
+
+	szD{name: "uint8", sn: "8", u: []uint64{0, 1, 255}},
+	szD{name: "int8", sn: "8", i: []int64{-128, -127, -1, 0, 1, 126, 127}},
+}
+
+var ops []op = []op{op{"add", "+"}, op{"sub", "-"}, op{"div", "/"}, op{"mul", "*"},
+	op{"lsh", "<<"}, op{"rsh", ">>"}}
+
+// compute the result of i op j, cast as type t.
+func ansU(i, j uint64, t, op string) string {
+	var ans uint64
+	switch op {
+	case "+":
+		ans = i + j
+	case "-":
+		ans = i - j
+	case "*":
+		ans = i * j
+	case "/":
+		if j != 0 {
+			ans = i / j
+		}
+	case "<<":
+		ans = i << j
+	case ">>":
+		ans = i >> j
+	}
+	switch t {
+	case "uint32":
+		ans = uint64(uint32(ans))
+	case "uint16":
+		ans = uint64(uint16(ans))
+	case "uint8":
+		ans = uint64(uint8(ans))
+	}
+	return fmt.Sprintf("%d", ans)
+}
+
+// compute the result of i op j, cast as type t.
+func ansS(i, j int64, t, op string) string {
+	var ans int64
+	switch op {
+	case "+":
+		ans = i + j
+	case "-":
+		ans = i - j
+	case "*":
+		ans = i * j
+	case "/":
+		if j != 0 {
+			ans = i / j
+		}
+	case "<<":
+		ans = i << uint64(j)
+	case ">>":
+		ans = i >> uint64(j)
+	}
+	switch t {
+	case "int32":
+		ans = int64(int32(ans))
+	case "int16":
+		ans = int64(int16(ans))
+	case "int8":
+		ans = int64(int8(ans))
+	}
+	return fmt.Sprintf("%d", ans)
+}
+
+func main() {
+
+	w := new(bytes.Buffer)
+
+	fmt.Fprintf(w, "package main;\n")
+	fmt.Fprintf(w, "import \"fmt\"\n")
+
+	fncCnst1, err := template.New("fnc").Parse(
+		`//go:noinline
+		func {{.Name}}_{{.Type_}}_{{.FNumber}}_ssa(a {{.Type_}}) {{.Type_}} {
+	return a {{.Symbol}} {{.Number}}
+}
+`)
+	if err != nil {
+		panic(err)
+	}
+	fncCnst2, err := template.New("fnc").Parse(
+		`//go:noinline
+		func {{.Name}}_{{.FNumber}}_{{.Type_}}_ssa(a {{.Type_}}) {{.Type_}} {
+	return {{.Number}} {{.Symbol}} a
+}
+
+`)
+	if err != nil {
+		panic(err)
+	}
+
+	type fncData struct {
+		Name, Type_, Symbol, FNumber, Number string
+	}
+
+	for _, s := range szs {
+		for _, o := range ops {
+			fd := fncData{o.name, s.name, o.symbol, "", ""}
+
+			// unsigned test cases
+			if len(s.u) > 0 {
+				for _, i := range s.u {
+					fd.Number = fmt.Sprintf("%d", i)
+					fd.FNumber = strings.Replace(fd.Number, "-", "Neg", -1)
+
+					// avoid division by zero
+					if o.name != "div" || i != 0 {
+						fncCnst1.Execute(w, fd)
+					}
+
+					fncCnst2.Execute(w, fd)
+				}
+			}
+
+			// signed test cases
+			if len(s.i) > 0 {
+				// don't generate tests for shifts by signed integers
+				if o.name == "lsh" || o.name == "rsh" {
+					continue
+				}
+				for _, i := range s.i {
+					fd.Number = fmt.Sprintf("%d", i)
+					fd.FNumber = strings.Replace(fd.Number, "-", "Neg", -1)
+
+					// avoid division by zero
+					if o.name != "div" || i != 0 {
+						fncCnst1.Execute(w, fd)
+					}
+					fncCnst2.Execute(w, fd)
+				}
+			}
+		}
+	}
+
+	fmt.Fprintf(w, "var failed bool\n\n")
+	fmt.Fprintf(w, "func main() {\n\n")
+
+	vrf1, _ := template.New("vrf1").Parse(`
+  if got := {{.Name}}_{{.FNumber}}_{{.Type_}}_ssa({{.Input}}); got != {{.Ans}} {
+  	fmt.Printf("{{.Name}}_{{.Type_}} {{.Number}}{{.Symbol}}{{.Input}} = %d, wanted {{.Ans}}\n",got)
+  	failed = true
+  }
+`)
+
+	vrf2, _ := template.New("vrf2").Parse(`
+  if got := {{.Name}}_{{.Type_}}_{{.FNumber}}_ssa({{.Input}}); got != {{.Ans}} {
+    fmt.Printf("{{.Name}}_{{.Type_}} {{.Input}}{{.Symbol}}{{.Number}} = %d, wanted {{.Ans}}\n",got)
+    failed = true
+  }
+`)
+
+	type cfncData struct {
+		Name, Type_, Symbol, FNumber, Number string
+		Ans, Input                           string
+	}
+	for _, s := range szs {
+		if len(s.u) > 0 {
+			for _, o := range ops {
+				fd := cfncData{o.name, s.name, o.symbol, "", "", "", ""}
+				for _, i := range s.u {
+					fd.Number = fmt.Sprintf("%d", i)
+					fd.FNumber = strings.Replace(fd.Number, "-", "Neg", -1)
+
+					// unsigned
+					for _, j := range s.u {
+
+						if o.name != "div" || j != 0 {
+							fd.Ans = ansU(i, j, s.name, o.symbol)
+							fd.Input = fmt.Sprintf("%d", j)
+							err = vrf1.Execute(w, fd)
+							if err != nil {
+								panic(err)
+							}
+						}
+
+						if o.name != "div" || i != 0 {
+							fd.Ans = ansU(j, i, s.name, o.symbol)
+							fd.Input = fmt.Sprintf("%d", j)
+							err = vrf2.Execute(w, fd)
+							if err != nil {
+								panic(err)
+							}
+						}
+
+					}
+				}
+
+			}
+		}
+
+		// signed
+		if len(s.i) > 0 {
+			for _, o := range ops {
+				// don't generate tests for shifts by signed integers
+				if o.name == "lsh" || o.name == "rsh" {
+					continue
+				}
+				fd := cfncData{o.name, s.name, o.symbol, "", "", "", ""}
+				for _, i := range s.i {
+					fd.Number = fmt.Sprintf("%d", i)
+					fd.FNumber = strings.Replace(fd.Number, "-", "Neg", -1)
+					for _, j := range s.i {
+						if o.name != "div" || j != 0 {
+							fd.Ans = ansS(i, j, s.name, o.symbol)
+							fd.Input = fmt.Sprintf("%d", j)
+							err = vrf1.Execute(w, fd)
+							if err != nil {
+								panic(err)
+							}
+						}
+
+						if o.name != "div" || i != 0 {
+							fd.Ans = ansS(j, i, s.name, o.symbol)
+							fd.Input = fmt.Sprintf("%d", j)
+							err = vrf2.Execute(w, fd)
+							if err != nil {
+								panic(err)
+							}
+						}
+
+					}
+				}
+
+			}
+		}
+	}
+
+	fmt.Fprintf(w, `if failed {
+        panic("tests failed")
+    }
+`)
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// write to file
+	err = ioutil.WriteFile("../arithConst_ssa.go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/gen/copyGen.go
+++ b/src/cmd/compile/internal/gc/testdata/gen/copyGen.go
@ -0,0 +1,93 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+)
+
+// This program generates tests to verify that copying operations
+// copy the data they are supposed to and clobber no adjacent values.
+
+// run as `go run copyGen.go`.  A file called copy_ssa.go
+// will be written into the parent directory containing the tests.
+
+var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025, 1024 + 7, 1024 + 8, 1024 + 9, 1024 + 15, 1024 + 16, 1024 + 17}
+
+func main() {
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// run\n")
+	fmt.Fprintf(w, "// autogenerated from gen/copyGen.go - do not edit!\n")
+	fmt.Fprintf(w, "package main\n")
+	fmt.Fprintf(w, "import \"fmt\"\n")
+
+	for _, s := range sizes {
+		// type for test
+		fmt.Fprintf(w, "type T%d struct {\n", s)
+		fmt.Fprintf(w, "  pre [8]byte\n")
+		fmt.Fprintf(w, "  mid [%d]byte\n", s)
+		fmt.Fprintf(w, "  post [8]byte\n")
+		fmt.Fprintf(w, "}\n")
+
+		// function being tested
+		fmt.Fprintf(w, "func t%dcopy_ssa(y, x *[%d]byte) {\n", s, s)
+		fmt.Fprintf(w, "  switch{}\n")
+		fmt.Fprintf(w, "  *y = *x\n")
+		fmt.Fprintf(w, "}\n")
+
+		// testing harness
+		fmt.Fprintf(w, "func testCopy%d() {\n", s)
+		fmt.Fprintf(w, "  a := T%d{[8]byte{201, 202, 203, 204, 205, 206, 207, 208},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "%d,", i%100)
+		}
+		fmt.Fprintf(w, "},[8]byte{211, 212, 213, 214, 215, 216, 217, 218}}\n")
+		fmt.Fprintf(w, "  x := [%d]byte{", s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "%d,", 100+i%100)
+		}
+		fmt.Fprintf(w, "}\n")
+		fmt.Fprintf(w, "  t%dcopy_ssa(&a.mid, &x)\n", s)
+		fmt.Fprintf(w, "  want := T%d{[8]byte{201, 202, 203, 204, 205, 206, 207, 208},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "%d,", 100+i%100)
+		}
+		fmt.Fprintf(w, "},[8]byte{211, 212, 213, 214, 215, 216, 217, 218}}\n")
+		fmt.Fprintf(w, "  if a != want {\n")
+		fmt.Fprintf(w, "    fmt.Printf(\"t%dcopy got=%%v, want %%v\\n\", a, want)\n", s)
+		fmt.Fprintf(w, "    failed=true\n")
+		fmt.Fprintf(w, "  }\n")
+		fmt.Fprintf(w, "}\n")
+	}
+
+	// boilerplate at end
+	fmt.Fprintf(w, "var failed bool\n")
+	fmt.Fprintf(w, "func main() {\n")
+	for _, s := range sizes {
+		fmt.Fprintf(w, "  testCopy%d()\n", s)
+	}
+	fmt.Fprintf(w, "  if failed {\n")
+	fmt.Fprintf(w, "    panic(\"failed\")\n")
+	fmt.Fprintf(w, "  }\n")
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// write to file
+	err = ioutil.WriteFile("../copy_ssa.go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
+++ b/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
@ -0,0 +1,88 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+)
+
+// This program generates tests to verify that zeroing operations
+// zero the data they are supposed to and clobber no adjacent values.
+
+// run as `go run zeroGen.go`.  A file called zero_ssa.go
+// will be written into the parent directory containing the tests.
+
+var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025}
+
+func main() {
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// run\n")
+	fmt.Fprintf(w, "// autogenerated from gen/zeroGen.go - do not edit!\n")
+	fmt.Fprintf(w, "package main\n")
+	fmt.Fprintf(w, "import \"fmt\"\n")
+
+	for _, s := range sizes {
+		// type for test
+		fmt.Fprintf(w, "type T%d struct {\n", s)
+		fmt.Fprintf(w, "  pre [8]byte\n")
+		fmt.Fprintf(w, "  mid [%d]byte\n", s)
+		fmt.Fprintf(w, "  post [8]byte\n")
+		fmt.Fprintf(w, "}\n")
+
+		// function being tested
+		fmt.Fprintf(w, "func zero%d_ssa(x *[%d]byte) {\n", s, s)
+		fmt.Fprintf(w, "  switch{}\n")
+		fmt.Fprintf(w, "  *x = [%d]byte{}\n", s)
+		fmt.Fprintf(w, "}\n")
+
+		// testing harness
+		fmt.Fprintf(w, "func testZero%d() {\n", s)
+		fmt.Fprintf(w, "  a := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "255,")
+		}
+		fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
+		fmt.Fprintf(w, "  zero%d_ssa(&a.mid)\n", s)
+		fmt.Fprintf(w, "  want := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "0,")
+		}
+		fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
+		fmt.Fprintf(w, "  if a != want {\n")
+		fmt.Fprintf(w, "    fmt.Printf(\"zero%d got=%%v, want %%v\\n\", a, want)\n", s)
+		fmt.Fprintf(w, "    failed=true\n")
+		fmt.Fprintf(w, "  }\n")
+		fmt.Fprintf(w, "}\n")
+	}
+
+	// boilerplate at end
+	fmt.Fprintf(w, "var failed bool\n")
+	fmt.Fprintf(w, "func main() {\n")
+	for _, s := range sizes {
+		fmt.Fprintf(w, "  testZero%d()\n", s)
+	}
+	fmt.Fprintf(w, "  if failed {\n")
+	fmt.Fprintf(w, "    panic(\"failed\")\n")
+	fmt.Fprintf(w, "  }\n")
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// write to file
+	err = ioutil.WriteFile("../zero_ssa.go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/loadstore_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/loadstore_ssa.go
@ -0,0 +1,117 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests load/store ordering
+
+package main
+
+import "fmt"
+
+// testLoadStoreOrder tests for reordering of stores/loads.
+func testLoadStoreOrder() {
+	z := uint32(1000)
+	if testLoadStoreOrder_ssa(&z, 100) == 0 {
+		println("testLoadStoreOrder failed")
+		failed = true
+	}
+}
+func testLoadStoreOrder_ssa(z *uint32, prec uint) int {
+	switch {
+	}
+	old := *z         // load
+	*z = uint32(prec) // store
+	if *z < old {     // load
+		return 1
+	}
+	return 0
+}
+
+func testStoreSize() {
+	a := [4]uint16{11, 22, 33, 44}
+	testStoreSize_ssa(&a[0], &a[2], 77)
+	want := [4]uint16{77, 22, 33, 44}
+	if a != want {
+		fmt.Println("testStoreSize failed.  want =", want, ", got =", a)
+		failed = true
+	}
+}
+func testStoreSize_ssa(p *uint16, q *uint16, v uint32) {
+	switch {
+	}
+	// Test to make sure that (Store ptr (Trunc32to16 val) mem)
+	// does not end up as a 32-bit store.  It must stay a 16 bit store
+	// even when Trunc32to16 is rewritten to be a nop.
+	// To ensure that we get rewrite the Trunc32to16 before
+	// we rewrite the Store, we force the truncate into an
+	// earlier basic block by using it on both branches.
+	w := uint16(v)
+	if p != nil {
+		*p = w
+	} else {
+		*q = w
+	}
+}
+
+var failed = false
+
+func testExtStore_ssa(p *byte, b bool) int {
+	switch {
+	}
+	x := *p
+	*p = 7
+	if b {
+		return int(x)
+	}
+	return 0
+}
+
+func testExtStore() {
+	const start = 8
+	var b byte = start
+	if got := testExtStore_ssa(&b, true); got != start {
+		fmt.Println("testExtStore failed.  want =", start, ", got =", got)
+		failed = true
+	}
+}
+
+var b int
+
+// testDeadStorePanic_ssa ensures that we don't optimize away stores
+// that could be read by after recover().  Modeled after fixedbugs/issue1304.
+func testDeadStorePanic_ssa(a int) (r int) {
+	switch {
+	}
+	defer func() {
+		recover()
+		r = a
+	}()
+	a = 2      // store
+	b := a - a // optimized to zero
+	c := 4
+	a = c / b // store, but panics
+	a = 3     // store
+	r = a
+	return
+}
+
+func testDeadStorePanic() {
+	if want, got := 2, testDeadStorePanic_ssa(1); want != got {
+		fmt.Println("testDeadStorePanic failed.  want =", want, ", got =", got)
+		failed = true
+	}
+}
+
+func main() {
+
+	testLoadStoreOrder()
+	testStoreSize()
+	testExtStore()
+	testDeadStorePanic()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/map_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/map_ssa.go
@ -0,0 +1,45 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// map_ssa.go tests map operations.
+package main
+
+import "fmt"
+
+var failed = false
+
+//go:noinline
+func lenMap_ssa(v map[int]int) int {
+	return len(v)
+}
+
+func testLenMap() {
+
+	v := make(map[int]int)
+	v[0] = 0
+	v[1] = 0
+	v[2] = 0
+
+	if want, got := 3, lenMap_ssa(v); got != want {
+		fmt.Printf("expected len(map) = %d, got %d", want, got)
+		failed = true
+	}
+}
+
+func testLenNilMap() {
+
+	var v map[int]int
+	if want, got := 0, lenMap_ssa(v); got != want {
+		fmt.Printf("expected len(nil) = %d, got %d", want, got)
+		failed = true
+	}
+}
+func main() {
+	testLenMap()
+	testLenNilMap()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/phi_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/phi_ssa.go
@ -0,0 +1,103 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// Test to make sure spills of cast-shortened values
+// don't end up spilling the pre-shortened size instead
+// of the post-shortened size.
+
+import (
+	"fmt"
+	"runtime"
+)
+
+// unfoldable true
+var true_ = true
+
+var data1 [26]int32
+var data2 [26]int64
+
+func init() {
+	for i := 0; i < 26; i++ {
+		// If we spill all 8 bytes of this datum, the 1 in the high-order 4 bytes
+		// will overwrite some other variable in the stack frame.
+		data2[i] = 0x100000000
+	}
+}
+
+func foo() int32 {
+	var a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z int32
+	if true_ {
+		a = data1[0]
+		b = data1[1]
+		c = data1[2]
+		d = data1[3]
+		e = data1[4]
+		f = data1[5]
+		g = data1[6]
+		h = data1[7]
+		i = data1[8]
+		j = data1[9]
+		k = data1[10]
+		l = data1[11]
+		m = data1[12]
+		n = data1[13]
+		o = data1[14]
+		p = data1[15]
+		q = data1[16]
+		r = data1[17]
+		s = data1[18]
+		t = data1[19]
+		u = data1[20]
+		v = data1[21]
+		w = data1[22]
+		x = data1[23]
+		y = data1[24]
+		z = data1[25]
+	} else {
+		a = int32(data2[0])
+		b = int32(data2[1])
+		c = int32(data2[2])
+		d = int32(data2[3])
+		e = int32(data2[4])
+		f = int32(data2[5])
+		g = int32(data2[6])
+		h = int32(data2[7])
+		i = int32(data2[8])
+		j = int32(data2[9])
+		k = int32(data2[10])
+		l = int32(data2[11])
+		m = int32(data2[12])
+		n = int32(data2[13])
+		o = int32(data2[14])
+		p = int32(data2[15])
+		q = int32(data2[16])
+		r = int32(data2[17])
+		s = int32(data2[18])
+		t = int32(data2[19])
+		u = int32(data2[20])
+		v = int32(data2[21])
+		w = int32(data2[22])
+		x = int32(data2[23])
+		y = int32(data2[24])
+		z = int32(data2[25])
+	}
+	// Lots of phis of the form phi(int32,int64) of type int32 happen here.
+	// Some will be stack phis.  For those stack phis, make sure the spill
+	// of the second argument uses the phi's width (4 bytes), not its width
+	// (8 bytes).  Otherwise, a random stack slot gets clobbered.
+
+	runtime.Gosched()
+	return a + b + c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r + s + t + u + v + w + x + y + z
+}
+
+func main() {
+	want := int32(0)
+	got := foo()
+	if got != want {
+		fmt.Printf("want %d, got %d\n", want, got)
+		panic("bad")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/regalloc_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/regalloc_ssa.go
@ -0,0 +1,57 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests phi implementation
+
+package main
+
+func phiOverwrite_ssa() int {
+	var n int
+	for i := 0; i < 10; i++ {
+		if i == 6 {
+			break
+		}
+		n = i
+	}
+	return n
+}
+
+func phiOverwrite() {
+	want := 5
+	got := phiOverwrite_ssa()
+	if got != want {
+		println("phiOverwrite_ssa()=", want, ", got", got)
+		failed = true
+	}
+}
+
+func phiOverwriteBig_ssa() int {
+	var a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z int
+	a = 1
+	for idx := 0; idx < 26; idx++ {
+		a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z = b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, a
+	}
+	return a*1 + b*2 + c*3 + d*4 + e*5 + f*6 + g*7 + h*8 + i*9 + j*10 + k*11 + l*12 + m*13 + n*14 + o*15 + p*16 + q*17 + r*18 + s*19 + t*20 + u*21 + v*22 + w*23 + x*24 + y*25 + z*26
+}
+
+func phiOverwriteBig() {
+	want := 1
+	got := phiOverwriteBig_ssa()
+	if got != want {
+		println("phiOverwriteBig_ssa()=", want, ", got", got)
+		failed = true
+	}
+}
+
+var failed = false
+
+func main() {
+	phiOverwrite()
+	phiOverwriteBig()
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/short_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/short_ssa.go
@ -0,0 +1,60 @@
+// run
+
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Tests short circuiting.
+
+package main
+
+func and_ssa(arg1, arg2 bool) bool {
+	return arg1 && rightCall(arg2)
+}
+
+func or_ssa(arg1, arg2 bool) bool {
+	return arg1 || rightCall(arg2)
+}
+
+var rightCalled bool
+
+//go:noinline
+func rightCall(v bool) bool {
+	rightCalled = true
+	return v
+	panic("unreached")
+}
+
+func testAnd(arg1, arg2, wantRes bool) { testShortCircuit("AND", arg1, arg2, and_ssa, arg1, wantRes) }
+func testOr(arg1, arg2, wantRes bool)  { testShortCircuit("OR", arg1, arg2, or_ssa, !arg1, wantRes) }
+
+func testShortCircuit(opName string, arg1, arg2 bool, fn func(bool, bool) bool, wantRightCall, wantRes bool) {
+	rightCalled = false
+	got := fn(arg1, arg2)
+	if rightCalled != wantRightCall {
+		println("failed for", arg1, opName, arg2, "; rightCalled=", rightCalled, "want=", wantRightCall)
+		failed = true
+	}
+	if wantRes != got {
+		println("failed for", arg1, opName, arg2, "; res=", got, "want=", wantRes)
+		failed = true
+	}
+}
+
+var failed = false
+
+func main() {
+	testAnd(false, false, false)
+	testAnd(false, true, false)
+	testAnd(true, false, false)
+	testAnd(true, true, true)
+
+	testOr(false, false, false)
+	testOr(false, true, true)
+	testOr(true, false, true)
+	testOr(true, true, true)
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/string_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/string_ssa.go
@ -0,0 +1,161 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// string_ssa.go tests string operations.
+package main
+
+var failed = false
+
+//go:noinline
+func testStringSlice1_ssa(a string, i, j int) string {
+	return a[i:]
+}
+
+//go:noinline
+func testStringSlice2_ssa(a string, i, j int) string {
+	return a[:j]
+}
+
+//go:noinline
+func testStringSlice12_ssa(a string, i, j int) string {
+	return a[i:j]
+}
+
+func testStringSlice() {
+	tests := [...]struct {
+		fn        func(string, int, int) string
+		s         string
+		low, high int
+		want      string
+	}{
+		// -1 means the value is not used.
+		{testStringSlice1_ssa, "foobar", 0, -1, "foobar"},
+		{testStringSlice1_ssa, "foobar", 3, -1, "bar"},
+		{testStringSlice1_ssa, "foobar", 6, -1, ""},
+		{testStringSlice2_ssa, "foobar", -1, 0, ""},
+		{testStringSlice2_ssa, "foobar", -1, 3, "foo"},
+		{testStringSlice2_ssa, "foobar", -1, 6, "foobar"},
+		{testStringSlice12_ssa, "foobar", 0, 6, "foobar"},
+		{testStringSlice12_ssa, "foobar", 0, 0, ""},
+		{testStringSlice12_ssa, "foobar", 6, 6, ""},
+		{testStringSlice12_ssa, "foobar", 1, 5, "ooba"},
+		{testStringSlice12_ssa, "foobar", 3, 3, ""},
+		{testStringSlice12_ssa, "", 0, 0, ""},
+	}
+
+	for i, t := range tests {
+		if got := t.fn(t.s, t.low, t.high); t.want != got {
+			println("#", i, " ", t.s, "[", t.low, ":", t.high, "] = ", got, " want ", t.want)
+			failed = true
+		}
+	}
+}
+
+type prefix struct {
+	prefix string
+}
+
+func (p *prefix) slice_ssa() {
+	p.prefix = p.prefix[:3]
+}
+
+func testStructSlice() {
+	switch {
+	}
+	p := &prefix{"prefix"}
+	p.slice_ssa()
+	if "pre" != p.prefix {
+		println("wrong field slice: wanted %s got %s", "pre", p.prefix)
+		failed = true
+	}
+}
+
+func testStringSlicePanic() {
+	defer func() {
+		if r := recover(); r != nil {
+			println("paniced as expected")
+		}
+	}()
+
+	str := "foobar"
+	println("got ", testStringSlice12_ssa(str, 3, 9))
+	println("expected to panic, but didn't")
+	failed = true
+}
+
+const _Accuracy_name = "BelowExactAbove"
+
+var _Accuracy_index = [...]uint8{0, 5, 10, 15}
+
+//go:noinline
+func testSmallIndexType_ssa(i int) string {
+	return _Accuracy_name[_Accuracy_index[i]:_Accuracy_index[i+1]]
+}
+
+func testSmallIndexType() {
+	tests := []struct {
+		i    int
+		want string
+	}{
+		{0, "Below"},
+		{1, "Exact"},
+		{2, "Above"},
+	}
+
+	for i, t := range tests {
+		if got := testSmallIndexType_ssa(t.i); got != t.want {
+			println("#", i, "got ", got, ", wanted", t.want)
+			failed = true
+		}
+	}
+}
+
+//go:noinline
+func testStringElem_ssa(s string, i int) byte {
+	return s[i]
+}
+
+func testStringElem() {
+	tests := []struct {
+		s string
+		i int
+		n byte
+	}{
+		{"foobar", 3, 98},
+		{"foobar", 0, 102},
+		{"foobar", 5, 114},
+	}
+	for _, t := range tests {
+		if got := testStringElem_ssa(t.s, t.i); got != t.n {
+			print("testStringElem \"", t.s, "\"[", t.i, "]=", got, ", wanted ", t.n, "\n")
+			failed = true
+		}
+	}
+}
+
+//go:noinline
+func testStringElemConst_ssa(i int) byte {
+	s := "foobar"
+	return s[i]
+}
+
+func testStringElemConst() {
+	if got := testStringElemConst_ssa(3); got != 98 {
+		println("testStringElemConst=", got, ", wanted 98")
+		failed = true
+	}
+}
+
+func main() {
+	testStringSlice()
+	testStringSlicePanic()
+	testStructSlice()
+	testSmallIndexType()
+	testStringElem()
+	testStringElemConst()
+
+	if failed {
+		panic("failed")
+	}
+}
--- a/src/cmd/compile/internal/gc/testdata/unsafe_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/unsafe_ssa.go
@ -0,0 +1,148 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+	"unsafe"
+)
+
+// global pointer slot
+var a *[8]uint
+
+// unfoldable true
+var b = true
+
+// Test to make sure that a pointer value which is alive
+// across a call is retained, even when there are matching
+// conversions to/from uintptr around the call.
+// We arrange things very carefully to have to/from
+// conversions on either side of the call which cannot be
+// combined with any other conversions.
+func f_ssa() *[8]uint {
+	// Make x a uintptr pointing to where a points.
+	var x uintptr
+	if b {
+		x = uintptr(unsafe.Pointer(a))
+	} else {
+		x = 0
+	}
+	// Clobber the global pointer.  The only live ref
+	// to the allocated object is now x.
+	a = nil
+
+	// Convert to pointer so it should hold
+	// the object live across GC call.
+	p := unsafe.Pointer(x)
+
+	// Call gc.
+	runtime.GC()
+
+	// Convert back to uintptr.
+	y := uintptr(p)
+
+	// Mess with y so that the subsequent cast
+	// to unsafe.Pointer can't be combined with the
+	// uintptr cast above.
+	var z uintptr
+	if b {
+		z = y
+	} else {
+		z = 0
+	}
+	return (*[8]uint)(unsafe.Pointer(z))
+}
+
+// g_ssa is the same as f_ssa, but with a bit of pointer
+// arithmetic for added insanity.
+func g_ssa() *[7]uint {
+	// Make x a uintptr pointing to where a points.
+	var x uintptr
+	if b {
+		x = uintptr(unsafe.Pointer(a))
+	} else {
+		x = 0
+	}
+	// Clobber the global pointer.  The only live ref
+	// to the allocated object is now x.
+	a = nil
+
+	// Offset x by one int.
+	x += unsafe.Sizeof(int(0))
+
+	// Convert to pointer so it should hold
+	// the object live across GC call.
+	p := unsafe.Pointer(x)
+
+	// Call gc.
+	runtime.GC()
+
+	// Convert back to uintptr.
+	y := uintptr(p)
+
+	// Mess with y so that the subsequent cast
+	// to unsafe.Pointer can't be combined with the
+	// uintptr cast above.
+	var z uintptr
+	if b {
+		z = y
+	} else {
+		z = 0
+	}
+	return (*[7]uint)(unsafe.Pointer(z))
+}
+
+func testf() {
+	a = new([8]uint)
+	for i := 0; i < 8; i++ {
+		a[i] = 0xabcd
+	}
+	c := f_ssa()
+	for i := 0; i < 8; i++ {
+		if c[i] != 0xabcd {
+			fmt.Printf("%d:%x\n", i, c[i])
+			panic("bad c")
+		}
+	}
+}
+
+func testg() {
+	a = new([8]uint)
+	for i := 0; i < 8; i++ {
+		a[i] = 0xabcd
+	}
+	c := g_ssa()
+	for i := 0; i < 7; i++ {
+		if c[i] != 0xabcd {
+			fmt.Printf("%d:%x\n", i, c[i])
+			panic("bad c")
+		}
+	}
+}
+
+func alias_ssa(ui64 *uint64, ui32 *uint32) uint32 {
+	*ui32 = 0xffffffff
+	*ui64 = 0                  // store
+	ret := *ui32               // load from same address, should be zero
+	*ui64 = 0xffffffffffffffff // store
+	return ret
+}
+func testdse() {
+	x := int64(-1)
+	// construct two pointers that alias one another
+	ui64 := (*uint64)(unsafe.Pointer(&x))
+	ui32 := (*uint32)(unsafe.Pointer(&x))
+	if want, got := uint32(0), alias_ssa(ui64, ui32); got != want {
+		fmt.Printf("alias_ssa: wanted %d, got %d\n", want, got)
+		panic("alias_ssa")
+	}
+}
+
+func main() {
+	testf()
+	testg()
+	testdse()
+}
--- a/src/cmd/compile/internal/gc/testdata/zero_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/zero_ssa.go
--- a/src/cmd/compile/internal/gc/type.go
+++ b/src/cmd/compile/internal/gc/type.go
@ -0,0 +1,388 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides methods that let us export a Type as an ../ssa:Type.
+// We don't export this package's Type directly because it would lead
+// to an import cycle with this package and ../ssa.
+// TODO: move Type to its own package, then we don't need to dance around import cycles.
+
+package gc
+
+import (
+	"cmd/compile/internal/ssa"
+	"fmt"
+)
+
+func (t *Type) Size() int64 {
+	dowidth(t)
+	return t.Width
+}
+
+func (t *Type) Alignment() int64 {
+	dowidth(t)
+	return int64(t.Align)
+}
+
+func (t *Type) SimpleString() string {
+	return Econv(t.Etype)
+}
+
+func (t *Type) Equal(u ssa.Type) bool {
+	x, ok := u.(*Type)
+	if !ok {
+		return false
+	}
+	return Eqtype(t, x)
+}
+
+// Compare compares types for purposes of the SSA back
+// end, returning an ssa.Cmp (one of CMPlt, CMPeq, CMPgt).
+// The answers are correct for an optimizer
+// or code generator, but not for Go source.
+// For example, "type gcDrainFlags int" results in
+// two Go-different types that Compare equal.
+// The order chosen is also arbitrary, only division into
+// equivalence classes (Types that compare CMPeq) matters.
+func (t *Type) Compare(u ssa.Type) ssa.Cmp {
+	x, ok := u.(*Type)
+	// ssa.CompilerType is smaller than gc.Type
+	// bare pointer equality is easy.
+	if !ok {
+		return ssa.CMPgt
+	}
+	if x == t {
+		return ssa.CMPeq
+	}
+	return t.cmp(x)
+}
+
+func cmpForNe(x bool) ssa.Cmp {
+	if x {
+		return ssa.CMPlt
+	}
+	return ssa.CMPgt
+}
+
+func (r *Sym) cmpsym(s *Sym) ssa.Cmp {
+	if r == s {
+		return ssa.CMPeq
+	}
+	if r == nil {
+		return ssa.CMPlt
+	}
+	if s == nil {
+		return ssa.CMPgt
+	}
+	// Fast sort, not pretty sort
+	if len(r.Name) != len(s.Name) {
+		return cmpForNe(len(r.Name) < len(s.Name))
+	}
+	if r.Pkg != s.Pkg {
+		if len(r.Pkg.Prefix) != len(s.Pkg.Prefix) {
+			return cmpForNe(len(r.Pkg.Prefix) < len(s.Pkg.Prefix))
+		}
+		if r.Pkg.Prefix != s.Pkg.Prefix {
+			return cmpForNe(r.Pkg.Prefix < s.Pkg.Prefix)
+		}
+	}
+	if r.Name != s.Name {
+		return cmpForNe(r.Name < s.Name)
+	}
+	return ssa.CMPeq
+}
+
+// cmp compares two *Types t and x, returning ssa.CMPlt,
+// ssa.CMPeq, ssa.CMPgt as t<x, t==x, t>x, for an arbitrary
+// and optimizer-centric notion of comparison.
+func (t *Type) cmp(x *Type) ssa.Cmp {
+	// This follows the structure of Eqtype in subr.go
+	// with two exceptions.
+	// 1. Symbols are compared more carefully because a <,=,> result is desired.
+	// 2. Maps are treated specially to avoid endless recursion -- maps
+	//    contain an internal data type not expressible in Go source code.
+	if t == x {
+		return ssa.CMPeq
+	}
+	if t == nil {
+		return ssa.CMPlt
+	}
+	if x == nil {
+		return ssa.CMPgt
+	}
+
+	if t.Etype != x.Etype {
+		return cmpForNe(t.Etype < x.Etype)
+	}
+
+	if t.Sym != nil || x.Sym != nil {
+		// Special case: we keep byte and uint8 separate
+		// for error messages.  Treat them as equal.
+		switch t.Etype {
+		case TUINT8:
+			if (t == Types[TUINT8] || t == bytetype) && (x == Types[TUINT8] || x == bytetype) {
+				return ssa.CMPeq
+			}
+
+		case TINT32:
+			if (t == Types[runetype.Etype] || t == runetype) && (x == Types[runetype.Etype] || x == runetype) {
+				return ssa.CMPeq
+			}
+		}
+	}
+
+	csym := t.Sym.cmpsym(x.Sym)
+	if csym != ssa.CMPeq {
+		return csym
+	}
+
+	if x.Sym != nil {
+		// Syms non-nil, if vargens match then equal.
+		if t.Vargen == x.Vargen {
+			return ssa.CMPeq
+		}
+		if t.Vargen < x.Vargen {
+			return ssa.CMPlt
+		}
+		return ssa.CMPgt
+	}
+	// both syms nil, look at structure below.
+
+	switch t.Etype {
+	case TBOOL, TFLOAT32, TFLOAT64, TCOMPLEX64, TCOMPLEX128, TUNSAFEPTR, TUINTPTR,
+		TINT8, TINT16, TINT32, TINT64, TINT, TUINT8, TUINT16, TUINT32, TUINT64, TUINT:
+		return ssa.CMPeq
+	}
+
+	switch t.Etype {
+	case TMAP, TFIELD:
+		// No special cases for these two, they are handled
+		// by the general code after the switch.
+
+	case TPTR32, TPTR64:
+		return t.Type.cmp(x.Type)
+
+	case TSTRUCT:
+		if t.Map == nil {
+			if x.Map != nil {
+				return ssa.CMPlt // nil < non-nil
+			}
+			// to the fallthrough
+		} else if x.Map == nil {
+			return ssa.CMPgt // nil > non-nil
+		} else if t.Map.Bucket == t {
+			// Both have non-nil Map
+			// Special case for Maps which include a recursive type where the recursion is not broken with a named type
+			if x.Map.Bucket != x {
+				return ssa.CMPlt // bucket maps are least
+			}
+			return t.Map.cmp(x.Map)
+		} // If t != t.Map.Bucket, fall through to general case
+
+		fallthrough
+	case TINTER:
+		t1 := t.Type
+		x1 := x.Type
+		for ; t1 != nil && x1 != nil; t1, x1 = t1.Down, x1.Down {
+			if t1.Embedded != x1.Embedded {
+				if t1.Embedded < x1.Embedded {
+					return ssa.CMPlt
+				}
+				return ssa.CMPgt
+			}
+			if t1.Note != x1.Note {
+				if t1.Note == nil {
+					return ssa.CMPlt
+				}
+				if x1.Note == nil {
+					return ssa.CMPgt
+				}
+				if *t1.Note != *x1.Note {
+					if *t1.Note < *x1.Note {
+						return ssa.CMPlt
+					}
+					return ssa.CMPgt
+				}
+			}
+			c := t1.Sym.cmpsym(x1.Sym)
+			if c != ssa.CMPeq {
+				return c
+			}
+			c = t1.Type.cmp(x1.Type)
+			if c != ssa.CMPeq {
+				return c
+			}
+		}
+		if t1 == x1 {
+			return ssa.CMPeq
+		}
+		if t1 == nil {
+			return ssa.CMPlt
+		}
+		return ssa.CMPgt
+
+	case TFUNC:
+		t1 := t.Type
+		t2 := x.Type
+		for ; t1 != nil && t2 != nil; t1, t2 = t1.Down, t2.Down {
+			// Loop over fields in structs, ignoring argument names.
+			ta := t1.Type
+			tb := t2.Type
+			for ; ta != nil && tb != nil; ta, tb = ta.Down, tb.Down {
+				if ta.Isddd != tb.Isddd {
+					if ta.Isddd {
+						return ssa.CMPgt
+					}
+					return ssa.CMPlt
+				}
+				c := ta.Type.cmp(tb.Type)
+				if c != ssa.CMPeq {
+					return c
+				}
+			}
+
+			if ta != tb {
+				if t1 == nil {
+					return ssa.CMPlt
+				}
+				return ssa.CMPgt
+			}
+		}
+		if t1 != t2 {
+			if t1 == nil {
+				return ssa.CMPlt
+			}
+			return ssa.CMPgt
+		}
+		return ssa.CMPeq
+
+	case TARRAY:
+		if t.Bound != x.Bound {
+			return cmpForNe(t.Bound < x.Bound)
+		}
+
+	case TCHAN:
+		if t.Chan != x.Chan {
+			return cmpForNe(t.Chan < x.Chan)
+		}
+
+	default:
+		e := fmt.Sprintf("Do not know how to compare %s with %s", t, x)
+		panic(e)
+	}
+
+	c := t.Down.cmp(x.Down)
+	if c != ssa.CMPeq {
+		return c
+	}
+	return t.Type.cmp(x.Type)
+}
+
+func (t *Type) IsBoolean() bool {
+	return t.Etype == TBOOL
+}
+
+func (t *Type) IsInteger() bool {
+	switch t.Etype {
+	case TINT8, TUINT8, TINT16, TUINT16, TINT32, TUINT32, TINT64, TUINT64, TINT, TUINT, TUINTPTR:
+		return true
+	}
+	return false
+}
+
+func (t *Type) IsSigned() bool {
+	switch t.Etype {
+	case TINT8, TINT16, TINT32, TINT64, TINT:
+		return true
+	}
+	return false
+}
+
+func (t *Type) IsFloat() bool {
+	return t.Etype == TFLOAT32 || t.Etype == TFLOAT64
+}
+
+func (t *Type) IsComplex() bool {
+	return t.Etype == TCOMPLEX64 || t.Etype == TCOMPLEX128
+}
+
+func (t *Type) IsPtr() bool {
+	return t.Etype == TPTR32 || t.Etype == TPTR64 || t.Etype == TUNSAFEPTR ||
+		t.Etype == TMAP || t.Etype == TCHAN || t.Etype == TFUNC
+}
+
+func (t *Type) IsString() bool {
+	return t.Etype == TSTRING
+}
+
+func (t *Type) IsMap() bool {
+	return t.Etype == TMAP
+}
+
+func (t *Type) IsChan() bool {
+	return t.Etype == TCHAN
+}
+
+func (t *Type) IsSlice() bool {
+	return t.Etype == TARRAY && t.Bound < 0
+}
+
+func (t *Type) IsArray() bool {
+	return t.Etype == TARRAY && t.Bound >= 0
+}
+
+func (t *Type) IsStruct() bool {
+	return t.Etype == TSTRUCT
+}
+
+func (t *Type) IsInterface() bool {
+	return t.Etype == TINTER
+}
+
+func (t *Type) Elem() ssa.Type {
+	return t.Type
+}
+func (t *Type) PtrTo() ssa.Type {
+	return Ptrto(t)
+}
+
+func (t *Type) NumFields() int64 {
+	return int64(countfield(t))
+}
+func (t *Type) FieldType(i int64) ssa.Type {
+	// TODO: store fields in a slice so we can
+	// look them up by index in constant time.
+	for t1 := t.Type; t1 != nil; t1 = t1.Down {
+		if t1.Etype != TFIELD {
+			panic("non-TFIELD in a TSTRUCT")
+		}
+		if i == 0 {
+			return t1.Type
+		}
+		i--
+	}
+	panic("not enough fields")
+}
+func (t *Type) FieldOff(i int64) int64 {
+	for t1 := t.Type; t1 != nil; t1 = t1.Down {
+		if t1.Etype != TFIELD {
+			panic("non-TFIELD in a TSTRUCT")
+		}
+		if i == 0 {
+			return t1.Width
+		}
+		i--
+	}
+	panic("not enough fields")
+}
+
+func (t *Type) NumElem() int64 {
+	if t.Etype != TARRAY {
+		panic("NumElem on non-TARRAY")
+	}
+	return int64(t.Bound)
+}
+
+func (t *Type) IsMemory() bool { return false }
+func (t *Type) IsFlags() bool  { return false }
+func (t *Type) IsVoid() bool   { return false }
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@ -2566,7 +2566,7 @@ func paramstoheap(argin **Type, out int) []*Node {
 			// Defer might stop a panic and show the
 			// return values as they exist at the time of panic.
 			// Make sure to zero them on entry to the function.
-			nn = append(nn, Nod(OAS, nodarg(t, 1), nil))
+			nn = append(nn, Nod(OAS, nodarg(t, -1), nil))
 		}

 		if v == nil || v.Class&PHEAP == 0 {
--- a/src/cmd/compile/internal/ssa/TODO
+++ b/src/cmd/compile/internal/ssa/TODO
@ -0,0 +1,68 @@
+This is a list of things that need to be worked on.  It will hopefully
+be complete soon.
+
+Coverage
+--------
+
+Correctness
+-----------
+- Debugging info (check & fix as much as we can)
+
+Optimizations (better compiled code)
+------------------------------------
+- Reduce register pressure in scheduler
+- More strength reduction: multiply -> shift/add combos (Worth doing?)
+- Add a value range propagation pass (for bounds elim & bitwidth reduction)
+- Make dead store pass inter-block
+- redundant CMP in sequences like this:
+  SUBQ $8, AX
+  CMP AX, $0
+  JEQ ...
+- If there are a lot of MOVQ $0, ..., then load
+  0 into a register and use the register as the source instead.
+- Allow arrays of length 1 (or longer, with all constant indexes?) to be SSAable.
+- Figure out how to make PARAMOUT variables ssa-able.
+  They need to get spilled automatically at end-of-function somehow.
+- If strings are being passed around without being interpreted (ptr
+  and len fields being accessed) pass them in xmm registers?
+  Same for interfaces?
+- OpArrayIndex should take its index in AuxInt, not a full value.
+- remove FLAGS from REP instruction clobbers
+- (x86) Combine loads into other ops
+  Note that this is challenging for ops that generate flags
+  because flagalloc wants to move those instructions around for
+  flag regeneration.
+- Non-constant rotate detection.
+- Do 0 <= x && x < n with one unsigned compare
+- nil-check removal in indexed load/store case:
+    lea    (%rdx,%rax,1),%rcx
+    test   %al,(%rcx)           // nil check
+    mov    (%rdx,%rax,1),%cl    // load to same address
+- any pointer generated by unsafe arithmetic must be non-nil?
+  (Of course that may not be true in general, but it is for all uses
+   in the runtime, and we can play games with unsafe.)
+
+Optimizations (better compiler)
+-------------------------------
+- Smaller Value.Type (int32 or ptr)?  Get rid of types altogether?
+- OpStore uses 3 args.  Increase the size of Value.argstorage to 3?
+- Use a constant cache for OpConstNil, OpConstInterface, OpConstSlice, maybe OpConstString
+- Handle signed division overflow and sign extension earlier
+- Implement 64 bit const division with high multiply, maybe in the frontend?
+- Add bit widths to complex ops
+
+Regalloc
+--------
+- Make less arch-dependent
+- Allow return values to be ssa-able
+- Handle 2-address instructions
+- Make liveness analysis non-quadratic
+
+Future/other
+------------
+- Start another architecture (arm?)
+- 64-bit ops on 32-bit machines
+- Investigate type equality. During SSA generation, should we use n.Type or (say) TypeBool?
+- Should we get rid of named types in favor of underlying types during SSA generation?
+- Should we introduce a new type equality routine that is less strict than the frontend's?
+- Infrastructure for enabling/disabling/configuring passes
--- a/src/cmd/compile/internal/ssa/block.go
+++ b/src/cmd/compile/internal/ssa/block.go
@ -0,0 +1,118 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "fmt"
+
+// Block represents a basic block in the control flow graph of a function.
+type Block struct {
+	// A unique identifier for the block.  The system will attempt to allocate
+	// these IDs densely, but no guarantees.
+	ID ID
+
+	// The kind of block this is.
+	Kind BlockKind
+
+	// Subsequent blocks, if any.  The number and order depend on the block kind.
+	// All successors must be distinct (to make phi values in successors unambiguous).
+	Succs []*Block
+
+	// Inverse of successors.
+	// The order is significant to Phi nodes in the block.
+	Preds []*Block
+	// TODO: predecessors is a pain to maintain.  Can we somehow order phi
+	// arguments by block id and have this field computed explicitly when needed?
+
+	// A value that determines how the block is exited.  Its value depends on the kind
+	// of the block.  For instance, a BlockIf has a boolean control value and BlockExit
+	// has a memory control value.
+	Control *Value
+
+	// Auxiliary info for the block.  Its value depends on the Kind.
+	Aux interface{}
+
+	// The unordered set of Values that define the operation of this block.
+	// The list must include the control value, if any. (TODO: need this last condition?)
+	// After the scheduling pass, this list is ordered.
+	Values []*Value
+
+	// The containing function
+	Func *Func
+
+	// Line number for block's control operation
+	Line int32
+
+	// Likely direction for branches.
+	// If BranchLikely, Succs[0] is the most likely branch taken.
+	// If BranchUnlikely, Succs[1] is the most likely branch taken.
+	// Ignored if len(Succs) < 2.
+	// Fatal if not BranchUnknown and len(Succs) > 2.
+	Likely BranchPrediction
+
+	// After flagalloc, records whether flags are live at the end of the block.
+	FlagsLiveAtEnd bool
+
+	// Storage for Succs, Preds, and Values
+	succstorage [2]*Block
+	predstorage [4]*Block
+	valstorage  [8]*Value
+}
+
+//     kind           control    successors
+//   ------------------------------------------
+//     Exit        return mem                []
+//    Plain               nil            [next]
+//       If   a boolean Value      [then, else]
+//     Call               mem  [nopanic, panic]  (control opcode should be OpCall or OpStaticCall)
+type BlockKind int32
+
+// short form print
+func (b *Block) String() string {
+	return fmt.Sprintf("b%d", b.ID)
+}
+
+// long form print
+func (b *Block) LongString() string {
+	s := b.Kind.String()
+	if b.Aux != nil {
+		s += fmt.Sprintf(" %s", b.Aux)
+	}
+	if b.Control != nil {
+		s += fmt.Sprintf(" %s", b.Control)
+	}
+	if len(b.Succs) > 0 {
+		s += " ->"
+		for _, c := range b.Succs {
+			s += " " + c.String()
+		}
+	}
+	switch b.Likely {
+	case BranchUnlikely:
+		s += " (unlikely)"
+	case BranchLikely:
+		s += " (likely)"
+	}
+	return s
+}
+
+// AddEdgeTo adds an edge from block b to block c.  Used during building of the
+// SSA graph; do not use on an already-completed SSA graph.
+func (b *Block) AddEdgeTo(c *Block) {
+	b.Succs = append(b.Succs, c)
+	c.Preds = append(c.Preds, b)
+}
+
+func (b *Block) Logf(msg string, args ...interface{})           { b.Func.Logf(msg, args...) }
+func (b *Block) Log() bool                                      { return b.Func.Log() }
+func (b *Block) Fatalf(msg string, args ...interface{})         { b.Func.Fatalf(msg, args...) }
+func (b *Block) Unimplementedf(msg string, args ...interface{}) { b.Func.Unimplementedf(msg, args...) }
+
+type BranchPrediction int8
+
+const (
+	BranchUnlikely = BranchPrediction(-1)
+	BranchUnknown  = BranchPrediction(0)
+	BranchLikely   = BranchPrediction(+1)
+)
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@ -0,0 +1,291 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// checkFunc checks invariants of f.
+func checkFunc(f *Func) {
+	blockMark := make([]bool, f.NumBlocks())
+	valueMark := make([]bool, f.NumValues())
+
+	for _, b := range f.Blocks {
+		if blockMark[b.ID] {
+			f.Fatalf("block %s appears twice in %s!", b, f.Name)
+		}
+		blockMark[b.ID] = true
+		if b.Func != f {
+			f.Fatalf("%s.Func=%s, want %s", b, b.Func.Name, f.Name)
+		}
+
+		if f.RegAlloc == nil {
+			for i, c := range b.Succs {
+				for j, d := range b.Succs {
+					if i != j && c == d {
+						f.Fatalf("%s.Succs has duplicate block %s", b, c)
+					}
+				}
+			}
+		}
+		// Note: duplicate successors are hard in the following case:
+		//      if(...) goto x else goto x
+		//   x: v = phi(a, b)
+		// If the conditional is true, does v get the value of a or b?
+		// We could solve this other ways, but the easiest is just to
+		// require (by possibly adding empty control-flow blocks) that
+		// all successors are distinct.  They will need to be distinct
+		// anyway for register allocation (duplicate successors implies
+		// the existence of critical edges).
+		// After regalloc we can allow non-distinct predecessors.
+
+		for _, p := range b.Preds {
+			var found bool
+			for _, c := range p.Succs {
+				if c == b {
+					found = true
+					break
+				}
+			}
+			if !found {
+				f.Fatalf("block %s is not a succ of its pred block %s", b, p)
+			}
+		}
+
+		switch b.Kind {
+		case BlockExit:
+			if len(b.Succs) != 0 {
+				f.Fatalf("exit block %s has successors", b)
+			}
+			if b.Control == nil {
+				f.Fatalf("exit block %s has no control value", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				f.Fatalf("exit block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+		case BlockRet:
+			if len(b.Succs) != 0 {
+				f.Fatalf("ret block %s has successors", b)
+			}
+			if b.Control == nil {
+				f.Fatalf("ret block %s has nil control %s", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				f.Fatalf("ret block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+		case BlockRetJmp:
+			if len(b.Succs) != 0 {
+				f.Fatalf("retjmp block %s len(Succs)==%d, want 0", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				f.Fatalf("retjmp block %s has nil control %s", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				f.Fatalf("retjmp block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+			if b.Aux == nil {
+				f.Fatalf("retjmp block %s has nil Aux field", b)
+			}
+		case BlockDead:
+			if len(b.Succs) != 0 {
+				f.Fatalf("dead block %s has successors", b)
+			}
+			if len(b.Preds) != 0 {
+				f.Fatalf("dead block %s has predecessors", b)
+			}
+			if len(b.Values) != 0 {
+				f.Fatalf("dead block %s has values", b)
+			}
+			if b.Control != nil {
+				f.Fatalf("dead block %s has a control value", b)
+			}
+		case BlockPlain:
+			if len(b.Succs) != 1 {
+				f.Fatalf("plain block %s len(Succs)==%d, want 1", b, len(b.Succs))
+			}
+			if b.Control != nil {
+				f.Fatalf("plain block %s has non-nil control %s", b, b.Control.LongString())
+			}
+		case BlockIf:
+			if len(b.Succs) != 2 {
+				f.Fatalf("if block %s len(Succs)==%d, want 2", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				f.Fatalf("if block %s has no control value", b)
+			}
+			if !b.Control.Type.IsBoolean() {
+				f.Fatalf("if block %s has non-bool control value %s", b, b.Control.LongString())
+			}
+		case BlockCall:
+			if len(b.Succs) != 1 {
+				f.Fatalf("call block %s len(Succs)==%d, want 1", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				f.Fatalf("call block %s has no control value", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				f.Fatalf("call block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+		case BlockCheck:
+			if len(b.Succs) != 1 {
+				f.Fatalf("check block %s len(Succs)==%d, want 1", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				f.Fatalf("check block %s has no control value", b)
+			}
+			if !b.Control.Type.IsVoid() {
+				f.Fatalf("check block %s has non-void control value %s", b, b.Control.LongString())
+			}
+		case BlockFirst:
+			if len(b.Succs) != 2 {
+				f.Fatalf("plain/dead block %s len(Succs)==%d, want 2", b, len(b.Succs))
+			}
+			if b.Control != nil {
+				f.Fatalf("plain/dead block %s has a control value", b)
+			}
+		}
+		if len(b.Succs) > 2 && b.Likely != BranchUnknown {
+			f.Fatalf("likeliness prediction %d for block %s with %d successors: %s", b.Likely, b, len(b.Succs))
+		}
+
+		for _, v := range b.Values {
+			// Check to make sure argument count makes sense (argLen of -1 indicates
+			// variable length args)
+			nArgs := opcodeTable[v.Op].argLen
+			if nArgs != -1 && int32(len(v.Args)) != nArgs {
+				f.Fatalf("value %v has %d args, expected %d", v.LongString(),
+					len(v.Args), nArgs)
+			}
+
+			// Check to make sure aux values make sense.
+			canHaveAux := false
+			canHaveAuxInt := false
+			switch opcodeTable[v.Op].auxType {
+			case auxNone:
+			case auxBool, auxInt8, auxInt16, auxInt32, auxInt64, auxFloat:
+				canHaveAuxInt = true
+			case auxString, auxSym:
+				canHaveAux = true
+			case auxSymOff, auxSymValAndOff:
+				canHaveAuxInt = true
+				canHaveAux = true
+			default:
+				f.Fatalf("unknown aux type for %s", v.Op)
+			}
+			if !canHaveAux && v.Aux != nil {
+				f.Fatalf("value %v has an Aux value %v but shouldn't", v.LongString(), v.Aux)
+			}
+			if !canHaveAuxInt && v.AuxInt != 0 {
+				f.Fatalf("value %v has an AuxInt value %d but shouldn't", v.LongString(), v.AuxInt)
+			}
+
+			for _, arg := range v.Args {
+				if arg == nil {
+					f.Fatalf("value %v has nil arg", v.LongString())
+				}
+			}
+
+			if valueMark[v.ID] {
+				f.Fatalf("value %s appears twice!", v.LongString())
+			}
+			valueMark[v.ID] = true
+
+			if v.Block != b {
+				f.Fatalf("%s.block != %s", v, b)
+			}
+			if v.Op == OpPhi && len(v.Args) != len(b.Preds) {
+				f.Fatalf("phi length %s does not match pred length %d for block %s", v.LongString(), len(b.Preds), b)
+			}
+
+			if v.Op == OpAddr {
+				if len(v.Args) == 0 {
+					f.Fatalf("no args for OpAddr %s", v.LongString())
+				}
+				if v.Args[0].Op != OpSP && v.Args[0].Op != OpSB {
+					f.Fatalf("bad arg to OpAddr %v", v)
+				}
+			}
+
+			// TODO: check for cycles in values
+			// TODO: check type
+		}
+	}
+
+	// Check to make sure all Blocks referenced are in the function.
+	if !blockMark[f.Entry.ID] {
+		f.Fatalf("entry block %v is missing", f.Entry)
+	}
+	for _, b := range f.Blocks {
+		for _, c := range b.Preds {
+			if !blockMark[c.ID] {
+				f.Fatalf("predecessor block %v for %v is missing", c, b)
+			}
+		}
+		for _, c := range b.Succs {
+			if !blockMark[c.ID] {
+				f.Fatalf("successor block %v for %v is missing", c, b)
+			}
+		}
+	}
+
+	if len(f.Entry.Preds) > 0 {
+		f.Fatalf("entry block %s of %s has predecessor(s) %v", f.Entry, f.Name, f.Entry.Preds)
+	}
+
+	// Check to make sure all Values referenced are in the function.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, a := range v.Args {
+				if !valueMark[a.ID] {
+					f.Fatalf("%v, arg %d of %v, is missing", a, i, v)
+				}
+			}
+		}
+		if b.Control != nil && !valueMark[b.Control.ID] {
+			f.Fatalf("control value for %s is missing: %v", b, b.Control)
+		}
+	}
+	for b := f.freeBlocks; b != nil; b = b.succstorage[0] {
+		if blockMark[b.ID] {
+			f.Fatalf("used block b%d in free list", b.ID)
+		}
+	}
+	for v := f.freeValues; v != nil; v = v.argstorage[0] {
+		if valueMark[v.ID] {
+			f.Fatalf("used value v%d in free list", v.ID)
+		}
+	}
+
+	// Check to make sure all args dominate uses.
+	if f.RegAlloc == nil {
+		// Note: regalloc introduces non-dominating args.
+		// See TODO in regalloc.go.
+		idom := dominators(f)
+		sdom := newSparseTree(f, idom)
+		for _, b := range f.Blocks {
+			for _, v := range b.Values {
+				for i, arg := range v.Args {
+					x := arg.Block
+					y := b
+					if v.Op == OpPhi {
+						y = b.Preds[i]
+					}
+					if !domCheck(f, sdom, x, y) {
+						f.Fatalf("arg %d of value %s does not dominate, arg=%s", i, v.LongString(), arg.LongString())
+					}
+				}
+			}
+			if b.Control != nil && !domCheck(f, sdom, b.Control.Block, b) {
+				f.Fatalf("control value %s for %s doesn't dominate", b.Control, b)
+			}
+		}
+	}
+}
+
+// domCheck reports whether x dominates y (including x==y).
+func domCheck(f *Func, sdom sparseTree, x, y *Block) bool {
+	if !sdom.isAncestorEq(y, f.Entry) {
+		// unreachable - ignore
+		return true
+	}
+	return sdom.isAncestorEq(x, y)
+}
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@ -0,0 +1,261 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"log"
+	"runtime"
+	"strings"
+	"time"
+)
+
+// Compile is the main entry point for this package.
+// Compile modifies f so that on return:
+//   · all Values in f map to 0 or 1 assembly instructions of the target architecture
+//   · the order of f.Blocks is the order to emit the Blocks
+//   · the order of b.Values is the order to emit the Values in each Block
+//   · f has a non-nil regAlloc field
+func Compile(f *Func) {
+	// TODO: debugging - set flags to control verbosity of compiler,
+	// which phases to dump IR before/after, etc.
+	if f.Log() {
+		f.Logf("compiling %s\n", f.Name)
+	}
+
+	// hook to print function & phase if panic happens
+	phaseName := "init"
+	defer func() {
+		if phaseName != "" {
+			err := recover()
+			stack := make([]byte, 16384)
+			n := runtime.Stack(stack, false)
+			stack = stack[:n]
+			f.Fatalf("panic during %s while compiling %s:\n\n%v\n\n%s\n", phaseName, f.Name, err, stack)
+		}
+	}()
+
+	// Run all the passes
+	printFunc(f)
+	f.Config.HTML.WriteFunc("start", f)
+	checkFunc(f)
+	const logMemStats = false
+	for _, p := range passes {
+		if !f.Config.optimize && !p.required {
+			continue
+		}
+		f.pass = &p
+		phaseName = p.name
+		if f.Log() {
+			f.Logf("  pass %s begin\n", p.name)
+		}
+		// TODO: capture logging during this pass, add it to the HTML
+		var mStart runtime.MemStats
+		if logMemStats || p.mem {
+			runtime.ReadMemStats(&mStart)
+		}
+
+		tStart := time.Now()
+		p.fn(f)
+		tEnd := time.Now()
+
+		// Need something less crude than "Log the whole intermediate result".
+		if f.Log() || f.Config.HTML != nil {
+			time := tEnd.Sub(tStart).Nanoseconds()
+			var stats string
+			if logMemStats {
+				var mEnd runtime.MemStats
+				runtime.ReadMemStats(&mEnd)
+				nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
+				nAllocs := mEnd.Mallocs - mStart.Mallocs
+				stats = fmt.Sprintf("[%d ns %d allocs %d bytes]", time, nAllocs, nBytes)
+			} else {
+				stats = fmt.Sprintf("[%d ns]", time)
+			}
+
+			f.Logf("  pass %s end %s\n", p.name, stats)
+			printFunc(f)
+			f.Config.HTML.WriteFunc(fmt.Sprintf("after %s <span class=\"stats\">%s</span>", phaseName, stats), f)
+		}
+		if p.time || p.mem {
+			// Surround timing information w/ enough context to allow comparisons.
+			time := tEnd.Sub(tStart).Nanoseconds()
+			if p.time {
+				f.logStat("TIME(ns)", time)
+			}
+			if p.mem {
+				var mEnd runtime.MemStats
+				runtime.ReadMemStats(&mEnd)
+				nBytes := mEnd.TotalAlloc - mStart.TotalAlloc
+				nAllocs := mEnd.Mallocs - mStart.Mallocs
+				f.logStat("TIME(ns):BYTES:ALLOCS", time, nBytes, nAllocs)
+			}
+		}
+		checkFunc(f)
+	}
+
+	// Squash error printing defer
+	phaseName = ""
+}
+
+type pass struct {
+	name     string
+	fn       func(*Func)
+	required bool
+	disabled bool
+	time     bool // report time to run pass
+	mem      bool // report mem stats to run pass
+	stats    int  // pass reports own "stats" (e.g., branches removed)
+	debug    int  // pass performs some debugging. =1 should be in error-testing-friendly Warnl format.
+	test     int  // pass-specific ad-hoc option, perhaps useful in development
+}
+
+// PhaseOption sets the specified flag in the specified ssa phase,
+// returning empty string if this was successful or a string explaining
+// the error if it was not.  A version of the phase name with "_"
+// replaced by " " is also checked for a match.
+// See gc/lex.go for dissection of the option string.  Example use:
+// GO_GCFLAGS=-d=ssa/generic_cse/time,ssa/generic_cse/stats,ssa/generic_cse/debug=3 ./make.bash ...
+//
+func PhaseOption(phase, flag string, val int) string {
+	underphase := strings.Replace(phase, "_", " ", -1)
+	for i, p := range passes {
+		if p.name == phase || p.name == underphase {
+			switch flag {
+			case "on":
+				p.disabled = val == 0
+			case "off":
+				p.disabled = val != 0
+			case "time":
+				p.time = val != 0
+			case "mem":
+				p.mem = val != 0
+			case "debug":
+				p.debug = val
+			case "stats":
+				p.stats = val
+			case "test":
+				p.test = val
+			default:
+				return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
+			}
+			if p.disabled && p.required {
+				return fmt.Sprintf("Cannot disable required SSA phase %s using -d=ssa/%s debug option", phase, phase)
+			}
+			passes[i] = p
+			return ""
+		}
+	}
+	return fmt.Sprintf("Did not find a phase matching %s in -d=ssa/... debug option", phase)
+}
+
+// list of passes for the compiler
+var passes = [...]pass{
+	// TODO: combine phielim and copyelim into a single pass?
+	{name: "early phielim", fn: phielim},
+	{name: "early copyelim", fn: copyelim},
+	{name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
+	{name: "short circuit", fn: shortcircuit},
+	{name: "decompose user", fn: decomposeUser, required: true},
+	{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
+	{name: "opt", fn: opt, required: true},           // TODO: split required rules and optimizing rules
+	{name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
+	{name: "opt deadcode", fn: deadcode},             // remove any blocks orphaned during opt
+	{name: "generic cse", fn: cse},
+	{name: "phiopt", fn: phiopt},
+	{name: "nilcheckelim", fn: nilcheckelim},
+	{name: "prove", fn: prove},
+	{name: "generic deadcode", fn: deadcode},
+	{name: "fuse", fn: fuse},
+	{name: "dse", fn: dse},
+	{name: "tighten", fn: tighten}, // move values closer to their uses
+	{name: "lower", fn: lower, required: true},
+	{name: "lowered cse", fn: cse},
+	{name: "lowered deadcode", fn: deadcode, required: true},
+	{name: "checkLower", fn: checkLower, required: true},
+	{name: "late phielim", fn: phielim},
+	{name: "late copyelim", fn: copyelim},
+	{name: "late deadcode", fn: deadcode},
+	{name: "critical", fn: critical, required: true}, // remove critical edges
+	{name: "likelyadjust", fn: likelyadjust},
+	{name: "layout", fn: layout, required: true},       // schedule blocks
+	{name: "schedule", fn: schedule, required: true},   // schedule values
+	{name: "flagalloc", fn: flagalloc, required: true}, // allocate flags register
+	{name: "regalloc", fn: regalloc, required: true},   // allocate int & float registers + stack slots
+	{name: "trim", fn: trim},                           // remove empty blocks
+}
+
+// Double-check phase ordering constraints.
+// This code is intended to document the ordering requirements
+// between different phases.  It does not override the passes
+// list above.
+type constraint struct {
+	a, b string // a must come before b
+}
+
+var passOrder = [...]constraint{
+	// prove reliese on common-subexpression elimination for maximum benefits.
+	{"generic cse", "prove"},
+	// deadcode after prove to eliminate all new dead blocks.
+	{"prove", "generic deadcode"},
+	// common-subexpression before dead-store elim, so that we recognize
+	// when two address expressions are the same.
+	{"generic cse", "dse"},
+	// cse substantially improves nilcheckelim efficacy
+	{"generic cse", "nilcheckelim"},
+	// allow deadcode to clean up after nilcheckelim
+	{"nilcheckelim", "generic deadcode"},
+	// nilcheckelim generates sequences of plain basic blocks
+	{"nilcheckelim", "fuse"},
+	// nilcheckelim relies on opt to rewrite user nil checks
+	{"opt", "nilcheckelim"},
+	// tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET
+	{"tighten", "lower"},
+	// tighten will be most effective when as many values have been removed as possible
+	{"generic deadcode", "tighten"},
+	{"generic cse", "tighten"},
+	// don't run optimization pass until we've decomposed builtin objects
+	{"decompose builtin", "opt"},
+	// don't layout blocks until critical edges have been removed
+	{"critical", "layout"},
+	// regalloc requires the removal of all critical edges
+	{"critical", "regalloc"},
+	// regalloc requires all the values in a block to be scheduled
+	{"schedule", "regalloc"},
+	// checkLower must run after lowering & subsequent dead code elim
+	{"lower", "checkLower"},
+	{"lowered deadcode", "checkLower"},
+	// flagalloc needs instructions to be scheduled.
+	{"schedule", "flagalloc"},
+	// regalloc needs flags to be allocated first.
+	{"flagalloc", "regalloc"},
+	// trim needs regalloc to be done first.
+	{"regalloc", "trim"},
+}
+
+func init() {
+	for _, c := range passOrder {
+		a, b := c.a, c.b
+		i := -1
+		j := -1
+		for k, p := range passes {
+			if p.name == a {
+				i = k
+			}
+			if p.name == b {
+				j = k
+			}
+		}
+		if i < 0 {
+			log.Panicf("pass %s not found", a)
+		}
+		if j < 0 {
+			log.Panicf("pass %s not found", b)
+		}
+		if i >= j {
+			log.Panicf("passes %s and %s out of order", a, b)
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@ -0,0 +1,235 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"cmd/internal/obj"
+	"crypto/sha1"
+	"fmt"
+	"os"
+	"strings"
+)
+
+type Config struct {
+	arch       string                     // "amd64", etc.
+	IntSize    int64                      // 4 or 8
+	PtrSize    int64                      // 4 or 8
+	lowerBlock func(*Block) bool          // lowering function
+	lowerValue func(*Value, *Config) bool // lowering function
+	fe         Frontend                   // callbacks into compiler frontend
+	HTML       *HTMLWriter                // html writer, for debugging
+	ctxt       *obj.Link                  // Generic arch information
+	optimize   bool                       // Do optimization
+	curFunc    *Func
+
+	// TODO: more stuff.  Compiler flags of interest, ...
+
+	// Given an environment variable used for debug hash match,
+	// what file (if any) receives the yes/no logging?
+	logfiles map[string]*os.File
+
+	// Storage for low-numbered values and blocks.
+	values [2000]Value
+	blocks [200]Block
+
+	domblockstore []ID         // scratch space for computing dominators
+	scrSparse     []*sparseSet // scratch sparse sets to be re-used.
+}
+
+type TypeSource interface {
+	TypeBool() Type
+	TypeInt8() Type
+	TypeInt16() Type
+	TypeInt32() Type
+	TypeInt64() Type
+	TypeUInt8() Type
+	TypeUInt16() Type
+	TypeUInt32() Type
+	TypeUInt64() Type
+	TypeInt() Type
+	TypeFloat32() Type
+	TypeFloat64() Type
+	TypeUintptr() Type
+	TypeString() Type
+	TypeBytePtr() Type // TODO: use unsafe.Pointer instead?
+
+	CanSSA(t Type) bool
+}
+
+type Logger interface {
+	// Logf logs a message from the compiler.
+	Logf(string, ...interface{})
+
+	// Log returns true if logging is not a no-op
+	// some logging calls account for more than a few heap allocations.
+	Log() bool
+
+	// Fatal reports a compiler error and exits.
+	Fatalf(line int32, msg string, args ...interface{})
+
+	// Unimplemented reports that the function cannot be compiled.
+	// It will be removed once SSA work is complete.
+	Unimplementedf(line int32, msg string, args ...interface{})
+
+	// Warnl writes compiler messages in the form expected by "errorcheck" tests
+	Warnl(line int, fmt_ string, args ...interface{})
+
+	// Fowards the Debug_checknil flag from gc
+	Debug_checknil() bool
+}
+
+type Frontend interface {
+	TypeSource
+	Logger
+
+	// StringData returns a symbol pointing to the given string's contents.
+	StringData(string) interface{} // returns *gc.Sym
+
+	// Auto returns a Node for an auto variable of the given type.
+	// The SSA compiler uses this function to allocate space for spills.
+	Auto(Type) GCNode
+
+	// Line returns a string describing the given line number.
+	Line(int32) string
+}
+
+// interface used to hold *gc.Node.  We'd use *gc.Node directly but
+// that would lead to an import cycle.
+type GCNode interface {
+	Typ() Type
+	String() string
+}
+
+// NewConfig returns a new configuration object for the given architecture.
+func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config {
+	c := &Config{arch: arch, fe: fe}
+	switch arch {
+	case "amd64":
+		c.IntSize = 8
+		c.PtrSize = 8
+		c.lowerBlock = rewriteBlockAMD64
+		c.lowerValue = rewriteValueAMD64
+	case "386":
+		c.IntSize = 4
+		c.PtrSize = 4
+		c.lowerBlock = rewriteBlockAMD64
+		c.lowerValue = rewriteValueAMD64 // TODO(khr): full 32-bit support
+	default:
+		fe.Unimplementedf(0, "arch %s not implemented", arch)
+	}
+	c.ctxt = ctxt
+	c.optimize = optimize
+
+	// Assign IDs to preallocated values/blocks.
+	for i := range c.values {
+		c.values[i].ID = ID(i)
+	}
+	for i := range c.blocks {
+		c.blocks[i].ID = ID(i)
+	}
+
+	c.logfiles = make(map[string]*os.File)
+
+	return c
+}
+
+func (c *Config) Frontend() Frontend { return c.fe }
+
+// NewFunc returns a new, empty function object.
+// Caller must call f.Free() before calling NewFunc again.
+func (c *Config) NewFunc() *Func {
+	// TODO(khr): should this function take name, type, etc. as arguments?
+	if c.curFunc != nil {
+		c.Fatalf(0, "NewFunc called without previous Free")
+	}
+	f := &Func{Config: c, NamedValues: map[LocalSlot][]*Value{}}
+	c.curFunc = f
+	return f
+}
+
+func (c *Config) Logf(msg string, args ...interface{})               { c.fe.Logf(msg, args...) }
+func (c *Config) Log() bool                                          { return c.fe.Log() }
+func (c *Config) Fatalf(line int32, msg string, args ...interface{}) { c.fe.Fatalf(line, msg, args...) }
+func (c *Config) Unimplementedf(line int32, msg string, args ...interface{}) {
+	c.fe.Unimplementedf(line, msg, args...)
+}
+func (c *Config) Warnl(line int, msg string, args ...interface{}) { c.fe.Warnl(line, msg, args...) }
+func (c *Config) Debug_checknil() bool                            { return c.fe.Debug_checknil() }
+
+func (c *Config) logDebugHashMatch(evname, name string) {
+	var file *os.File
+	file = c.logfiles[evname]
+	if file == nil {
+		file = os.Stdout
+		tmpfile := os.Getenv("GSHS_LOGFILE")
+		if tmpfile != "" {
+			var ok error
+			file, ok = os.Create(tmpfile)
+			if ok != nil {
+				c.Fatalf(0, "Could not open hash-testing logfile %s", tmpfile)
+			}
+		}
+		c.logfiles[evname] = file
+	}
+	s := fmt.Sprintf("%s triggered %s\n", evname, name)
+	file.WriteString(s)
+	file.Sync()
+}
+
+// DebugHashMatch returns true if environment variable evname
+// 1) is empty (this is a special more-quickly implemented case of 3)
+// 2) is "y" or "Y"
+// 3) is a suffix of the sha1 hash of name
+// 4) is a suffix of the environment variable
+//    fmt.Sprintf("%s%d", evname, n)
+//    provided that all such variables are nonempty for 0 <= i <= n
+// Otherwise it returns false.
+// When true is returned the message
+//  "%s triggered %s\n", evname, name
+// is printed on the file named in environment variable
+//  GSHS_LOGFILE
+// or standard out if that is empty or there is an error
+// opening the file.
+
+func (c *Config) DebugHashMatch(evname, name string) bool {
+	evhash := os.Getenv(evname)
+	if evhash == "" {
+		return true // default behavior with no EV is "on"
+	}
+	if evhash == "y" || evhash == "Y" {
+		c.logDebugHashMatch(evname, name)
+		return true
+	}
+	if evhash == "n" || evhash == "N" {
+		return false
+	}
+	// Check the hash of the name against a partial input hash.
+	// We use this feature to do a binary search to
+	// find a function that is incorrectly compiled.
+	hstr := ""
+	for _, b := range sha1.Sum([]byte(name)) {
+		hstr += fmt.Sprintf("%08b", b)
+	}
+
+	if strings.HasSuffix(hstr, evhash) {
+		c.logDebugHashMatch(evname, name)
+		return true
+	}
+
+	// Iteratively try additional hashes to allow tests for multi-point
+	// failure.
+	for i := 0; true; i++ {
+		ev := fmt.Sprintf("%s%d", evname, i)
+		evv := os.Getenv(ev)
+		if evv == "" {
+			break
+		}
+		if strings.HasSuffix(hstr, evv) {
+			c.logDebugHashMatch(ev, name)
+			return true
+		}
+	}
+	return false
+}
--- a/src/cmd/compile/internal/ssa/copyelim.go
+++ b/src/cmd/compile/internal/ssa/copyelim.go
@ -0,0 +1,60 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// copyelim removes all copies from f.
+func copyelim(f *Func) {
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			copyelimValue(v)
+		}
+		v := b.Control
+		if v != nil {
+			for v.Op == OpCopy {
+				v = v.Args[0]
+			}
+			b.Control = v
+		}
+	}
+
+	// Update named values.
+	for _, name := range f.Names {
+		values := f.NamedValues[name]
+		for i, v := range values {
+			x := v
+			for x.Op == OpCopy {
+				x = x.Args[0]
+			}
+			if x != v {
+				values[i] = v
+			}
+		}
+	}
+}
+
+func copyelimValue(v *Value) {
+	// elide any copies generated during rewriting
+	for i, a := range v.Args {
+		if a.Op != OpCopy {
+			continue
+		}
+		// Rewriting can generate OpCopy loops.
+		// They are harmless (see removePredecessor),
+		// but take care to stop if we find a cycle.
+		slow := a // advances every other iteration
+		var advance bool
+		for a.Op == OpCopy {
+			a = a.Args[0]
+			if slow == a {
+				break
+			}
+			if advance {
+				slow = slow.Args[0]
+			}
+			advance = !advance
+		}
+		v.Args[i] = a
+	}
+}
--- a/src/cmd/compile/internal/ssa/critical.go
+++ b/src/cmd/compile/internal/ssa/critical.go
@ -0,0 +1,39 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// critical splits critical edges (those that go from a block with
+// more than one outedge to a block with more than one inedge).
+// Regalloc wants a critical-edge-free CFG so it can implement phi values.
+func critical(f *Func) {
+	for _, b := range f.Blocks {
+		if len(b.Preds) <= 1 {
+			continue
+		}
+
+		// split input edges coming from multi-output blocks.
+		for i, c := range b.Preds {
+			if c.Kind == BlockPlain {
+				continue // only single output block
+			}
+
+			// allocate a new block to place on the edge
+			d := f.NewBlock(BlockPlain)
+			d.Line = c.Line
+
+			// splice it in
+			d.Preds = append(d.Preds, c)
+			d.Succs = append(d.Succs, b)
+			b.Preds[i] = d
+			// replace b with d in c's successor list.
+			for j, b2 := range c.Succs {
+				if b2 == b {
+					c.Succs[j] = d
+					break
+				}
+			}
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/cse.go
+++ b/src/cmd/compile/internal/ssa/cse.go
@ -0,0 +1,304 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"sort"
+)
+
+const (
+	cmpDepth = 4
+)
+
+// cse does common-subexpression elimination on the Function.
+// Values are just relinked, nothing is deleted.  A subsequent deadcode
+// pass is required to actually remove duplicate expressions.
+func cse(f *Func) {
+	// Two values are equivalent if they satisfy the following definition:
+	// equivalent(v, w):
+	//   v.op == w.op
+	//   v.type == w.type
+	//   v.aux == w.aux
+	//   v.auxint == w.auxint
+	//   len(v.args) == len(w.args)
+	//   v.block == w.block if v.op == OpPhi
+	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
+
+	// The algorithm searches for a partition of f's values into
+	// equivalence classes using the above definition.
+	// It starts with a coarse partition and iteratively refines it
+	// until it reaches a fixed point.
+
+	// Make initial coarse partitions by using a subset of the conditions above.
+	a := make([]*Value, 0, f.NumValues())
+	auxIDs := auxmap{}
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if auxIDs[v.Aux] == 0 {
+				auxIDs[v.Aux] = int32(len(auxIDs)) + 1
+			}
+			if v.Type.IsMemory() {
+				continue // memory values can never cse
+			}
+			if opcodeTable[v.Op].commutative && len(v.Args) == 2 && v.Args[1].ID < v.Args[0].ID {
+				// Order the arguments of binary commutative operations.
+				v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
+			}
+			a = append(a, v)
+		}
+	}
+	partition := partitionValues(a, auxIDs)
+
+	// map from value id back to eqclass id
+	valueEqClass := make([]ID, f.NumValues())
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			// Use negative equivalence class #s for unique values.
+			valueEqClass[v.ID] = -v.ID
+		}
+	}
+	for i, e := range partition {
+		if f.pass.debug > 1 && len(e) > 500 {
+			fmt.Printf("CSE.large partition (%d): ", len(e))
+			for j := 0; j < 3; j++ {
+				fmt.Printf("%s ", e[j].LongString())
+			}
+			fmt.Println()
+		}
+
+		for _, v := range e {
+			valueEqClass[v.ID] = ID(i)
+		}
+		if f.pass.debug > 2 && len(e) > 1 {
+			fmt.Printf("CSE.partition #%d:", i)
+			for _, v := range e {
+				fmt.Printf(" %s", v.String())
+			}
+			fmt.Printf("\n")
+		}
+	}
+
+	// Find an equivalence class where some members of the class have
+	// non-equivalent arguments.  Split the equivalence class appropriately.
+	// Repeat until we can't find any more splits.
+	for {
+		changed := false
+
+		// partition can grow in the loop. By not using a range loop here,
+		// we process new additions as they arrive, avoiding O(n^2) behavior.
+		for i := 0; i < len(partition); i++ {
+			e := partition[i]
+			v := e[0]
+			// all values in this equiv class that are not equivalent to v get moved
+			// into another equiv class.
+			// To avoid allocating while building that equivalence class,
+			// move the values equivalent to v to the beginning of e
+			// and other values to the end of e.
+			allvals := e
+		eqloop:
+			for j := 1; j < len(e); {
+				w := e[j]
+				equivalent := true
+				for i := 0; i < len(v.Args); i++ {
+					if valueEqClass[v.Args[i].ID] != valueEqClass[w.Args[i].ID] {
+						equivalent = false
+						break
+					}
+				}
+				if !equivalent || !v.Type.Equal(w.Type) {
+					// w is not equivalent to v.
+					// move it to the end and shrink e.
+					e[j], e[len(e)-1] = e[len(e)-1], e[j]
+					e = e[:len(e)-1]
+					valueEqClass[w.ID] = ID(len(partition))
+					changed = true
+					continue eqloop
+				}
+				// v and w are equivalent.  Keep w in e.
+				j++
+			}
+			partition[i] = e
+			if len(e) < len(allvals) {
+				partition = append(partition, allvals[len(e):])
+			}
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	// Compute dominator tree
+	idom := dominators(f)
+	sdom := newSparseTree(f, idom)
+
+	// Compute substitutions we would like to do.  We substitute v for w
+	// if v and w are in the same equivalence class and v dominates w.
+	rewrite := make([]*Value, f.NumValues())
+	for _, e := range partition {
+		for len(e) > 1 {
+			// Find a maximal dominant element in e
+			v := e[0]
+			for _, w := range e[1:] {
+				if sdom.isAncestorEq(w.Block, v.Block) {
+					v = w
+				}
+			}
+
+			// Replace all elements of e which v dominates
+			for i := 0; i < len(e); {
+				w := e[i]
+				if w == v {
+					e, e[i] = e[:len(e)-1], e[len(e)-1]
+				} else if sdom.isAncestorEq(v.Block, w.Block) {
+					rewrite[w.ID] = v
+					e, e[i] = e[:len(e)-1], e[len(e)-1]
+				} else {
+					i++
+				}
+			}
+		}
+	}
+
+	rewrites := int64(0)
+
+	// Apply substitutions
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, w := range v.Args {
+				if x := rewrite[w.ID]; x != nil {
+					v.SetArg(i, x)
+					rewrites++
+				}
+			}
+		}
+		if v := b.Control; v != nil {
+			if x := rewrite[v.ID]; x != nil {
+				if v.Op == OpNilCheck {
+					// nilcheck pass will remove the nil checks and log
+					// them appropriately, so don't mess with them here.
+					continue
+				}
+				b.Control = x
+			}
+		}
+	}
+	if f.pass.stats > 0 {
+		f.logStat("CSE REWRITES", rewrites)
+	}
+}
+
+// An eqclass approximates an equivalence class.  During the
+// algorithm it may represent the union of several of the
+// final equivalence classes.
+type eqclass []*Value
+
+// partitionValues partitions the values into equivalence classes
+// based on having all the following features match:
+//  - opcode
+//  - type
+//  - auxint
+//  - aux
+//  - nargs
+//  - block # if a phi op
+//  - first two arg's opcodes and auxint
+//  - NOT first two arg's aux; that can break CSE.
+// partitionValues returns a list of equivalence classes, each
+// being a sorted by ID list of *Values.  The eqclass slices are
+// backed by the same storage as the input slice.
+// Equivalence classes of size 1 are ignored.
+func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
+	sort.Sort(sortvalues{a, auxIDs})
+
+	var partition []eqclass
+	for len(a) > 0 {
+		v := a[0]
+		j := 1
+		for ; j < len(a); j++ {
+			w := a[j]
+			if cmpVal(v, w, auxIDs, cmpDepth) != CMPeq {
+				break
+			}
+		}
+		if j > 1 {
+			partition = append(partition, a[:j])
+		}
+		a = a[j:]
+	}
+
+	return partition
+}
+func lt2Cmp(isLt bool) Cmp {
+	if isLt {
+		return CMPlt
+	}
+	return CMPgt
+}
+
+type auxmap map[interface{}]int32
+
+func cmpVal(v, w *Value, auxIDs auxmap, depth int) Cmp {
+	// Try to order these comparison by cost (cheaper first)
+	if v.Op != w.Op {
+		return lt2Cmp(v.Op < w.Op)
+	}
+	if v.AuxInt != w.AuxInt {
+		return lt2Cmp(v.AuxInt < w.AuxInt)
+	}
+	if len(v.Args) != len(w.Args) {
+		return lt2Cmp(len(v.Args) < len(w.Args))
+	}
+	if v.Op == OpPhi && v.Block != w.Block {
+		return lt2Cmp(v.Block.ID < w.Block.ID)
+	}
+
+	if tc := v.Type.Compare(w.Type); tc != CMPeq {
+		return tc
+	}
+
+	if v.Aux != w.Aux {
+		if v.Aux == nil {
+			return CMPlt
+		}
+		if w.Aux == nil {
+			return CMPgt
+		}
+		return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
+	}
+
+	if depth > 0 {
+		for i := range v.Args {
+			if v.Args[i] == w.Args[i] {
+				// skip comparing equal args
+				continue
+			}
+			if ac := cmpVal(v.Args[i], w.Args[i], auxIDs, depth-1); ac != CMPeq {
+				return ac
+			}
+		}
+	}
+
+	return CMPeq
+}
+
+// Sort values to make the initial partition.
+type sortvalues struct {
+	a      []*Value // array of values
+	auxIDs auxmap   // aux -> aux ID map
+}
+
+func (sv sortvalues) Len() int      { return len(sv.a) }
+func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
+func (sv sortvalues) Less(i, j int) bool {
+	v := sv.a[i]
+	w := sv.a[j]
+	if cmp := cmpVal(v, w, sv.auxIDs, cmpDepth); cmp != CMPeq {
+		return cmp == CMPlt
+	}
+
+	// Sort by value ID last to keep the sort result deterministic.
+	return v.ID < w.ID
+}
--- a/src/cmd/compile/internal/ssa/cse_test.go
+++ b/src/cmd/compile/internal/ssa/cse_test.go
@ -0,0 +1,123 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "testing"
+
+type tstAux struct {
+	s string
+}
+
+// This tests for a bug found when partitioning, but not sorting by the Aux value.
+func TestCSEAuxPartitionBug(t *testing.T) {
+	c := testConfig(t)
+	arg1Aux := &tstAux{"arg1-aux"}
+	arg2Aux := &tstAux{"arg2-aux"}
+	arg3Aux := &tstAux{"arg3-aux"}
+
+	// construct lots of values with args that have aux values and place
+	// them in an order that triggers the bug
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sp", OpSP, TypeBytePtr, 0, nil),
+			Valu("r7", OpAdd64, TypeInt64, 0, nil, "arg3", "arg1"),
+			Valu("r1", OpAdd64, TypeInt64, 0, nil, "arg1", "arg2"),
+			Valu("arg1", OpArg, TypeInt64, 0, arg1Aux),
+			Valu("arg2", OpArg, TypeInt64, 0, arg2Aux),
+			Valu("arg3", OpArg, TypeInt64, 0, arg3Aux),
+			Valu("r9", OpAdd64, TypeInt64, 0, nil, "r7", "r8"),
+			Valu("r4", OpAdd64, TypeInt64, 0, nil, "r1", "r2"),
+			Valu("r8", OpAdd64, TypeInt64, 0, nil, "arg3", "arg2"),
+			Valu("r2", OpAdd64, TypeInt64, 0, nil, "arg1", "arg2"),
+			Valu("raddr", OpAddr, TypeInt64Ptr, 0, nil, "sp"),
+			Valu("raddrdef", OpVarDef, TypeMem, 0, nil, "start"),
+			Valu("r6", OpAdd64, TypeInt64, 0, nil, "r4", "r5"),
+			Valu("r3", OpAdd64, TypeInt64, 0, nil, "arg1", "arg2"),
+			Valu("r5", OpAdd64, TypeInt64, 0, nil, "r2", "r3"),
+			Valu("r10", OpAdd64, TypeInt64, 0, nil, "r6", "r9"),
+			Valu("rstore", OpStore, TypeMem, 8, nil, "raddr", "r10", "raddrdef"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("rstore")))
+
+	CheckFunc(fun.f)
+	cse(fun.f)
+	deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	s1Cnt := 2
+	// r1 == r2 == r3, needs to remove two of this set
+	s2Cnt := 1
+	// r4 == r5, needs to remove one of these
+	for k, v := range fun.values {
+		if v.Op == OpInvalid {
+			switch k {
+			case "r1":
+				fallthrough
+			case "r2":
+				fallthrough
+			case "r3":
+				if s1Cnt == 0 {
+					t.Errorf("cse removed all of r1,r2,r3")
+				}
+				s1Cnt--
+
+			case "r4":
+				fallthrough
+			case "r5":
+				if s2Cnt == 0 {
+					t.Errorf("cse removed all of r4,r5")
+				}
+				s2Cnt--
+			default:
+				t.Errorf("cse removed %s, but shouldn't have", k)
+			}
+		}
+	}
+
+	if s1Cnt != 0 || s2Cnt != 0 {
+		t.Errorf("%d values missed during cse", s1Cnt+s2Cnt)
+	}
+}
+
+// TestZCSE tests the zero arg cse.
+func TestZCSE(t *testing.T) {
+	c := testConfig(t)
+
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sp", OpSP, TypeBytePtr, 0, nil),
+			Valu("sb1", OpSB, TypeBytePtr, 0, nil),
+			Valu("sb2", OpSB, TypeBytePtr, 0, nil),
+			Valu("addr1", OpAddr, TypeInt64Ptr, 0, nil, "sb1"),
+			Valu("addr2", OpAddr, TypeInt64Ptr, 0, nil, "sb2"),
+			Valu("a1ld", OpLoad, TypeInt64, 0, nil, "addr1", "start"),
+			Valu("a2ld", OpLoad, TypeInt64, 0, nil, "addr2", "start"),
+			Valu("c1", OpConst64, TypeInt64, 1, nil),
+			Valu("r1", OpAdd64, TypeInt64, 0, nil, "a1ld", "c1"),
+			Valu("c2", OpConst64, TypeInt64, 1, nil),
+			Valu("r2", OpAdd64, TypeInt64, 0, nil, "a2ld", "c2"),
+			Valu("r3", OpAdd64, TypeInt64, 0, nil, "r1", "r2"),
+			Valu("raddr", OpAddr, TypeInt64Ptr, 0, nil, "sp"),
+			Valu("raddrdef", OpVarDef, TypeMem, 0, nil, "start"),
+			Valu("rstore", OpStore, TypeMem, 8, nil, "raddr", "r3", "raddrdef"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("rstore")))
+
+	CheckFunc(fun.f)
+	zcse(fun.f)
+	deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	if fun.values["c1"].Op != OpInvalid && fun.values["c2"].Op != OpInvalid {
+		t.Errorf("zsce should have removed c1 or c2")
+	}
+	if fun.values["sb1"].Op != OpInvalid && fun.values["sb2"].Op != OpInvalid {
+		t.Errorf("zsce should have removed sb1 or sb2")
+	}
+}
--- a/src/cmd/compile/internal/ssa/deadcode.go
+++ b/src/cmd/compile/internal/ssa/deadcode.go
@ -0,0 +1,270 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// findlive returns the reachable blocks and live values in f.
+func findlive(f *Func) (reachable []bool, live []bool) {
+	reachable = reachableBlocks(f)
+	live = liveValues(f, reachable)
+	return
+}
+
+// reachableBlocks returns the reachable blocks in f.
+func reachableBlocks(f *Func) []bool {
+	reachable := make([]bool, f.NumBlocks())
+	reachable[f.Entry.ID] = true
+	p := []*Block{f.Entry} // stack-like worklist
+	for len(p) > 0 {
+		// Pop a reachable block
+		b := p[len(p)-1]
+		p = p[:len(p)-1]
+		// Mark successors as reachable
+		s := b.Succs
+		if b.Kind == BlockFirst {
+			s = s[:1]
+		}
+		for _, c := range s {
+			if !reachable[c.ID] {
+				reachable[c.ID] = true
+				p = append(p, c) // push
+			}
+		}
+	}
+	return reachable
+}
+
+// liveValues returns the live values in f.
+// reachable is a map from block ID to whether the block is reachable.
+func liveValues(f *Func, reachable []bool) []bool {
+	live := make([]bool, f.NumValues())
+
+	// After regalloc, consider all values to be live.
+	// See the comment at the top of regalloc.go and in deadcode for details.
+	if f.RegAlloc != nil {
+		for i := range live {
+			live[i] = true
+		}
+		return live
+	}
+
+	// Find all live values
+	var q []*Value // stack-like worklist of unscanned values
+
+	// Starting set: all control values of reachable blocks are live.
+	for _, b := range f.Blocks {
+		if !reachable[b.ID] {
+			continue
+		}
+		if v := b.Control; v != nil && !live[v.ID] {
+			live[v.ID] = true
+			q = append(q, v)
+		}
+	}
+
+	// Compute transitive closure of live values.
+	for len(q) > 0 {
+		// pop a reachable value
+		v := q[len(q)-1]
+		q = q[:len(q)-1]
+		for i, x := range v.Args {
+			if v.Op == OpPhi && !reachable[v.Block.Preds[i].ID] {
+				continue
+			}
+			if !live[x.ID] {
+				live[x.ID] = true
+				q = append(q, x) // push
+			}
+		}
+	}
+
+	return live
+}
+
+// deadcode removes dead code from f.
+func deadcode(f *Func) {
+	// deadcode after regalloc is forbidden for now.  Regalloc
+	// doesn't quite generate legal SSA which will lead to some
+	// required moves being eliminated.  See the comment at the
+	// top of regalloc.go for details.
+	if f.RegAlloc != nil {
+		f.Fatalf("deadcode after regalloc")
+	}
+
+	// Find reachable blocks.
+	reachable := reachableBlocks(f)
+
+	// Get rid of edges from dead to live code.
+	for _, b := range f.Blocks {
+		if reachable[b.ID] {
+			continue
+		}
+		for _, c := range b.Succs {
+			if reachable[c.ID] {
+				c.removePred(b)
+			}
+		}
+	}
+
+	// Get rid of dead edges from live code.
+	for _, b := range f.Blocks {
+		if !reachable[b.ID] {
+			continue
+		}
+		if b.Kind != BlockFirst {
+			continue
+		}
+		c := b.Succs[1]
+		b.Succs[1] = nil
+		b.Succs = b.Succs[:1]
+		b.Kind = BlockPlain
+		b.Likely = BranchUnknown
+
+		if reachable[c.ID] {
+			// Note: c must be reachable through some other edge.
+			c.removePred(b)
+		}
+	}
+
+	// Splice out any copies introduced during dead block removal.
+	copyelim(f)
+
+	// Find live values.
+	live := liveValues(f, reachable)
+
+	// Remove dead & duplicate entries from namedValues map.
+	s := f.newSparseSet(f.NumValues())
+	defer f.retSparseSet(s)
+	i := 0
+	for _, name := range f.Names {
+		j := 0
+		s.clear()
+		values := f.NamedValues[name]
+		for _, v := range values {
+			if live[v.ID] && !s.contains(v.ID) {
+				values[j] = v
+				j++
+				s.add(v.ID)
+			}
+		}
+		if j == 0 {
+			delete(f.NamedValues, name)
+		} else {
+			f.Names[i] = name
+			i++
+			for k := len(values) - 1; k >= j; k-- {
+				values[k] = nil
+			}
+			f.NamedValues[name] = values[:j]
+		}
+	}
+	for k := len(f.Names) - 1; k >= i; k-- {
+		f.Names[k] = LocalSlot{}
+	}
+	f.Names = f.Names[:i]
+
+	// Remove dead values from blocks' value list.  Return dead
+	// values to the allocator.
+	for _, b := range f.Blocks {
+		i := 0
+		for _, v := range b.Values {
+			if live[v.ID] {
+				b.Values[i] = v
+				i++
+			} else {
+				f.freeValue(v)
+			}
+		}
+		// aid GC
+		tail := b.Values[i:]
+		for j := range tail {
+			tail[j] = nil
+		}
+		b.Values = b.Values[:i]
+	}
+
+	// Remove unreachable blocks.  Return dead blocks to allocator.
+	i = 0
+	for _, b := range f.Blocks {
+		if reachable[b.ID] {
+			f.Blocks[i] = b
+			i++
+		} else {
+			if len(b.Values) > 0 {
+				b.Fatalf("live values in unreachable block %v: %v", b, b.Values)
+			}
+			f.freeBlock(b)
+		}
+	}
+	// zero remainder to help GC
+	tail := f.Blocks[i:]
+	for j := range tail {
+		tail[j] = nil
+	}
+	f.Blocks = f.Blocks[:i]
+}
+
+// removePred removes the predecessor p from b's predecessor list.
+func (b *Block) removePred(p *Block) {
+	var i int
+	found := false
+	for j, q := range b.Preds {
+		if q == p {
+			i = j
+			found = true
+			break
+		}
+	}
+	// TODO: the above loop could make the deadcode pass take quadratic time
+	if !found {
+		b.Fatalf("can't find predecessor %v of %v\n", p, b)
+	}
+
+	n := len(b.Preds) - 1
+	b.Preds[i] = b.Preds[n]
+	b.Preds[n] = nil // aid GC
+	b.Preds = b.Preds[:n]
+
+	// rewrite phi ops to match the new predecessor list
+	for _, v := range b.Values {
+		if v.Op != OpPhi {
+			continue
+		}
+		v.Args[i] = v.Args[n]
+		v.Args[n] = nil // aid GC
+		v.Args = v.Args[:n]
+		phielimValue(v)
+		// Note: this is trickier than it looks.  Replacing
+		// a Phi with a Copy can in general cause problems because
+		// Phi and Copy don't have exactly the same semantics.
+		// Phi arguments always come from a predecessor block,
+		// whereas copies don't.  This matters in loops like:
+		// 1: x = (Phi y)
+		//    y = (Add x 1)
+		//    goto 1
+		// If we replace Phi->Copy, we get
+		// 1: x = (Copy y)
+		//    y = (Add x 1)
+		//    goto 1
+		// (Phi y) refers to the *previous* value of y, whereas
+		// (Copy y) refers to the *current* value of y.
+		// The modified code has a cycle and the scheduler
+		// will barf on it.
+		//
+		// Fortunately, this situation can only happen for dead
+		// code loops.  We know the code we're working with is
+		// not dead, so we're ok.
+		// Proof: If we have a potential bad cycle, we have a
+		// situation like this:
+		//   x = (Phi z)
+		//   y = (op1 x ...)
+		//   z = (op2 y ...)
+		// Where opX are not Phi ops.  But such a situation
+		// implies a cycle in the dominator graph.  In the
+		// example, x.Block dominates y.Block, y.Block dominates
+		// z.Block, and z.Block dominates x.Block (treating
+		// "dominates" as reflexive).  Cycles in the dominator
+		// graph can only happen in an unreachable cycle.
+	}
+}
--- a/src/cmd/compile/internal/ssa/deadcode_test.go
+++ b/src/cmd/compile/internal/ssa/deadcode_test.go
@ -0,0 +1,134 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "testing"
+
+func TestDeadLoop(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")),
+		// dead loop
+		Bloc("deadblock",
+			// dead value in dead block
+			Valu("deadval", OpConstBool, TypeBool, 1, nil),
+			If("deadval", "deadblock", "exit")))
+
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["deadblock"] {
+			t.Errorf("dead block not removed")
+		}
+		for _, v := range b.Values {
+			if v == fun.values["deadval"] {
+				t.Errorf("control value of dead block not removed")
+			}
+		}
+	}
+}
+
+func TestDeadValue(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("deadval", OpConst64, TypeInt64, 37, nil),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		for _, v := range b.Values {
+			if v == fun.values["deadval"] {
+				t.Errorf("dead value not removed")
+			}
+		}
+	}
+}
+
+func TestNeverTaken(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("cond", OpConstBool, TypeBool, 0, nil),
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			If("cond", "then", "else")),
+		Bloc("then",
+			Goto("exit")),
+		Bloc("else",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	Opt(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	if fun.blocks["entry"].Kind != BlockPlain {
+		t.Errorf("if(false) not simplified")
+	}
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["then"] {
+			t.Errorf("then block still present")
+		}
+		for _, v := range b.Values {
+			if v == fun.values["cond"] {
+				t.Errorf("constant condition still present")
+			}
+		}
+	}
+
+}
+
+func TestNestedDeadBlocks(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("cond", OpConstBool, TypeBool, 0, nil),
+			If("cond", "b2", "b4")),
+		Bloc("b2",
+			If("cond", "b3", "b4")),
+		Bloc("b3",
+			If("cond", "b3", "b4")),
+		Bloc("b4",
+			If("cond", "b3", "exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	Opt(fun.f)
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+	if fun.blocks["entry"].Kind != BlockPlain {
+		t.Errorf("if(false) not simplified")
+	}
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["b2"] {
+			t.Errorf("b2 block still present")
+		}
+		if b == fun.blocks["b3"] {
+			t.Errorf("b3 block still present")
+		}
+		for _, v := range b.Values {
+			if v == fun.values["cond"] {
+				t.Errorf("constant condition still present")
+			}
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@ -0,0 +1,116 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// dse does dead-store elimination on the Function.
+// Dead stores are those which are unconditionally followed by
+// another store to the same location, with no intervening load.
+// This implementation only works within a basic block.  TODO: use something more global.
+func dse(f *Func) {
+	var stores []*Value
+	loadUse := f.newSparseSet(f.NumValues())
+	defer f.retSparseSet(loadUse)
+	storeUse := f.newSparseSet(f.NumValues())
+	defer f.retSparseSet(storeUse)
+	shadowed := f.newSparseSet(f.NumValues())
+	defer f.retSparseSet(shadowed)
+	for _, b := range f.Blocks {
+		// Find all the stores in this block.  Categorize their uses:
+		//  loadUse contains stores which are used by a subsequent load.
+		//  storeUse contains stores which are used by a subsequent store.
+		loadUse.clear()
+		storeUse.clear()
+		stores = stores[:0]
+		for _, v := range b.Values {
+			if v.Op == OpPhi {
+				// Ignore phis - they will always be first and can't be eliminated
+				continue
+			}
+			if v.Type.IsMemory() {
+				stores = append(stores, v)
+				for _, a := range v.Args {
+					if a.Block == b && a.Type.IsMemory() {
+						storeUse.add(a.ID)
+						if v.Op != OpStore && v.Op != OpZero && v.Op != OpVarDef && v.Op != OpVarKill {
+							// CALL, DUFFCOPY, etc. are both
+							// reads and writes.
+							loadUse.add(a.ID)
+						}
+					}
+				}
+			} else {
+				for _, a := range v.Args {
+					if a.Block == b && a.Type.IsMemory() {
+						loadUse.add(a.ID)
+					}
+				}
+			}
+		}
+		if len(stores) == 0 {
+			continue
+		}
+
+		// find last store in the block
+		var last *Value
+		for _, v := range stores {
+			if storeUse.contains(v.ID) {
+				continue
+			}
+			if last != nil {
+				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
+			}
+			last = v
+		}
+		if last == nil {
+			b.Fatalf("no last store found - cycle?")
+		}
+
+		// Walk backwards looking for dead stores.  Keep track of shadowed addresses.
+		// An "address" is an SSA Value which encodes both the address and size of
+		// the write.  This code will not remove dead stores to the same address
+		// of different types.
+		shadowed.clear()
+		v := last
+
+	walkloop:
+		if loadUse.contains(v.ID) {
+			// Someone might be reading this memory state.
+			// Clear all shadowed addresses.
+			shadowed.clear()
+		}
+		if v.Op == OpStore || v.Op == OpZero {
+			if shadowed.contains(v.Args[0].ID) {
+				// Modify store into a copy
+				if v.Op == OpStore {
+					// store addr value mem
+					v.SetArgs1(v.Args[2])
+				} else {
+					// zero addr mem
+					sz := v.Args[0].Type.Elem().Size()
+					if v.AuxInt != sz {
+						f.Fatalf("mismatched zero/store sizes: %d and %d [%s]",
+							v.AuxInt, sz, v.LongString())
+					}
+					v.SetArgs1(v.Args[1])
+				}
+				v.Aux = nil
+				v.AuxInt = 0
+				v.Op = OpCopy
+			} else {
+				shadowed.add(v.Args[0].ID)
+			}
+		}
+		// walk to previous store
+		if v.Op == OpPhi {
+			continue // At start of block.  Move on to next block.
+		}
+		for _, a := range v.Args {
+			if a.Block == b && a.Type.IsMemory() {
+				v = a
+				goto walkloop
+			}
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/deadstore_test.go
+++ b/src/cmd/compile/internal/ssa/deadstore_test.go
@ -0,0 +1,97 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "testing"
+
+func TestDeadStore(t *testing.T) {
+	c := testConfig(t)
+	elemType := &TypeImpl{Size_: 8, Name: "testtype"}
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr", Elem_: elemType} // dummy for testing
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("v", OpConstBool, TypeBool, 1, nil),
+			Valu("addr1", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("addr2", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("addr3", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("zero1", OpZero, TypeMem, 8, nil, "addr3", "start"),
+			Valu("store1", OpStore, TypeMem, 1, nil, "addr1", "v", "zero1"),
+			Valu("store2", OpStore, TypeMem, 1, nil, "addr2", "v", "store1"),
+			Valu("store3", OpStore, TypeMem, 1, nil, "addr1", "v", "store2"),
+			Valu("store4", OpStore, TypeMem, 1, nil, "addr3", "v", "store3"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("store3")))
+
+	CheckFunc(fun.f)
+	dse(fun.f)
+	CheckFunc(fun.f)
+
+	v1 := fun.values["store1"]
+	if v1.Op != OpCopy {
+		t.Errorf("dead store not removed")
+	}
+
+	v2 := fun.values["zero1"]
+	if v2.Op != OpCopy {
+		t.Errorf("dead store (zero) not removed")
+	}
+}
+func TestDeadStorePhi(t *testing.T) {
+	// make sure we don't get into an infinite loop with phi values.
+	c := testConfig(t)
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("v", OpConstBool, TypeBool, 1, nil),
+			Valu("addr", OpAddr, ptrType, 0, nil, "sb"),
+			Goto("loop")),
+		Bloc("loop",
+			Valu("phi", OpPhi, TypeMem, 0, nil, "start", "store"),
+			Valu("store", OpStore, TypeMem, 1, nil, "addr", "v", "phi"),
+			If("v", "loop", "exit")),
+		Bloc("exit",
+			Exit("store")))
+
+	CheckFunc(fun.f)
+	dse(fun.f)
+	CheckFunc(fun.f)
+}
+
+func TestDeadStoreTypes(t *testing.T) {
+	// Make sure a narrow store can't shadow a wider one.  We test an even
+	// stronger restriction, that one store can't shadow another unless the
+	// types of the address fields are identical (where identicalness is
+	// decided by the CSE pass).
+	c := testConfig(t)
+	t1 := &TypeImpl{Size_: 8, Ptr: true, Name: "t1"}
+	t2 := &TypeImpl{Size_: 4, Ptr: true, Name: "t2"}
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("start", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("v", OpConstBool, TypeBool, 1, nil),
+			Valu("addr1", OpAddr, t1, 0, nil, "sb"),
+			Valu("addr2", OpAddr, t2, 0, nil, "sb"),
+			Valu("store1", OpStore, TypeMem, 1, nil, "addr1", "v", "start"),
+			Valu("store2", OpStore, TypeMem, 1, nil, "addr2", "v", "store1"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("store2")))
+
+	CheckFunc(fun.f)
+	cse(fun.f)
+	dse(fun.f)
+	CheckFunc(fun.f)
+
+	v := fun.values["store1"]
+	if v.Op == OpCopy {
+		t.Errorf("store %s incorrectly removed", v)
+	}
+}
--- a/src/cmd/compile/internal/ssa/decompose.go
+++ b/src/cmd/compile/internal/ssa/decompose.go
@ -0,0 +1,261 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// decompose converts phi ops on compound builtin types into phi
+// ops on simple types.
+// (The remaining compound ops are decomposed with rewrite rules.)
+func decomposeBuiltIn(f *Func) {
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			decomposeBuiltInPhi(v)
+		}
+	}
+
+	// Split up named values into their components.
+	// NOTE: the component values we are making are dead at this point.
+	// We must do the opt pass before any deadcode elimination or we will
+	// lose the name->value correspondence.
+	for _, name := range f.Names {
+		t := name.Type
+		switch {
+		case t.IsComplex():
+			var elemType Type
+			if t.Size() == 16 {
+				elemType = f.Config.fe.TypeFloat64()
+			} else {
+				elemType = f.Config.fe.TypeFloat32()
+			}
+			rName := LocalSlot{name.N, elemType, name.Off}
+			iName := LocalSlot{name.N, elemType, name.Off + elemType.Size()}
+			f.Names = append(f.Names, rName, iName)
+			for _, v := range f.NamedValues[name] {
+				r := v.Block.NewValue1(v.Line, OpComplexReal, elemType, v)
+				i := v.Block.NewValue1(v.Line, OpComplexImag, elemType, v)
+				f.NamedValues[rName] = append(f.NamedValues[rName], r)
+				f.NamedValues[iName] = append(f.NamedValues[iName], i)
+			}
+		case t.IsString():
+			ptrType := f.Config.fe.TypeBytePtr()
+			lenType := f.Config.fe.TypeInt()
+			ptrName := LocalSlot{name.N, ptrType, name.Off}
+			lenName := LocalSlot{name.N, lenType, name.Off + f.Config.PtrSize}
+			f.Names = append(f.Names, ptrName, lenName)
+			for _, v := range f.NamedValues[name] {
+				ptr := v.Block.NewValue1(v.Line, OpStringPtr, ptrType, v)
+				len := v.Block.NewValue1(v.Line, OpStringLen, lenType, v)
+				f.NamedValues[ptrName] = append(f.NamedValues[ptrName], ptr)
+				f.NamedValues[lenName] = append(f.NamedValues[lenName], len)
+			}
+		case t.IsSlice():
+			ptrType := f.Config.fe.TypeBytePtr()
+			lenType := f.Config.fe.TypeInt()
+			ptrName := LocalSlot{name.N, ptrType, name.Off}
+			lenName := LocalSlot{name.N, lenType, name.Off + f.Config.PtrSize}
+			capName := LocalSlot{name.N, lenType, name.Off + 2*f.Config.PtrSize}
+			f.Names = append(f.Names, ptrName, lenName, capName)
+			for _, v := range f.NamedValues[name] {
+				ptr := v.Block.NewValue1(v.Line, OpSlicePtr, ptrType, v)
+				len := v.Block.NewValue1(v.Line, OpSliceLen, lenType, v)
+				cap := v.Block.NewValue1(v.Line, OpSliceCap, lenType, v)
+				f.NamedValues[ptrName] = append(f.NamedValues[ptrName], ptr)
+				f.NamedValues[lenName] = append(f.NamedValues[lenName], len)
+				f.NamedValues[capName] = append(f.NamedValues[capName], cap)
+			}
+		case t.IsInterface():
+			ptrType := f.Config.fe.TypeBytePtr()
+			typeName := LocalSlot{name.N, ptrType, name.Off}
+			dataName := LocalSlot{name.N, ptrType, name.Off + f.Config.PtrSize}
+			f.Names = append(f.Names, typeName, dataName)
+			for _, v := range f.NamedValues[name] {
+				typ := v.Block.NewValue1(v.Line, OpITab, ptrType, v)
+				data := v.Block.NewValue1(v.Line, OpIData, ptrType, v)
+				f.NamedValues[typeName] = append(f.NamedValues[typeName], typ)
+				f.NamedValues[dataName] = append(f.NamedValues[dataName], data)
+			}
+		case t.Size() > f.Config.IntSize:
+			f.Unimplementedf("undecomposed named type %s", t)
+		}
+	}
+}
+
+func decomposeBuiltInPhi(v *Value) {
+	// TODO: decompose 64-bit ops on 32-bit archs?
+	switch {
+	case v.Type.IsComplex():
+		decomposeComplexPhi(v)
+	case v.Type.IsString():
+		decomposeStringPhi(v)
+	case v.Type.IsSlice():
+		decomposeSlicePhi(v)
+	case v.Type.IsInterface():
+		decomposeInterfacePhi(v)
+	case v.Type.Size() > v.Block.Func.Config.IntSize:
+		v.Unimplementedf("undecomposed type %s", v.Type)
+	}
+}
+
+func decomposeStringPhi(v *Value) {
+	fe := v.Block.Func.Config.fe
+	ptrType := fe.TypeBytePtr()
+	lenType := fe.TypeInt()
+
+	ptr := v.Block.NewValue0(v.Line, OpPhi, ptrType)
+	len := v.Block.NewValue0(v.Line, OpPhi, lenType)
+	for _, a := range v.Args {
+		ptr.AddArg(a.Block.NewValue1(v.Line, OpStringPtr, ptrType, a))
+		len.AddArg(a.Block.NewValue1(v.Line, OpStringLen, lenType, a))
+	}
+	v.reset(OpStringMake)
+	v.AddArg(ptr)
+	v.AddArg(len)
+}
+
+func decomposeSlicePhi(v *Value) {
+	fe := v.Block.Func.Config.fe
+	ptrType := fe.TypeBytePtr()
+	lenType := fe.TypeInt()
+
+	ptr := v.Block.NewValue0(v.Line, OpPhi, ptrType)
+	len := v.Block.NewValue0(v.Line, OpPhi, lenType)
+	cap := v.Block.NewValue0(v.Line, OpPhi, lenType)
+	for _, a := range v.Args {
+		ptr.AddArg(a.Block.NewValue1(v.Line, OpSlicePtr, ptrType, a))
+		len.AddArg(a.Block.NewValue1(v.Line, OpSliceLen, lenType, a))
+		cap.AddArg(a.Block.NewValue1(v.Line, OpSliceCap, lenType, a))
+	}
+	v.reset(OpSliceMake)
+	v.AddArg(ptr)
+	v.AddArg(len)
+	v.AddArg(cap)
+}
+
+func decomposeComplexPhi(v *Value) {
+	fe := v.Block.Func.Config.fe
+	var partType Type
+	switch z := v.Type.Size(); z {
+	case 8:
+		partType = fe.TypeFloat32()
+	case 16:
+		partType = fe.TypeFloat64()
+	default:
+		v.Fatalf("decomposeComplexPhi: bad complex size %d", z)
+	}
+
+	real := v.Block.NewValue0(v.Line, OpPhi, partType)
+	imag := v.Block.NewValue0(v.Line, OpPhi, partType)
+	for _, a := range v.Args {
+		real.AddArg(a.Block.NewValue1(v.Line, OpComplexReal, partType, a))
+		imag.AddArg(a.Block.NewValue1(v.Line, OpComplexImag, partType, a))
+	}
+	v.reset(OpComplexMake)
+	v.AddArg(real)
+	v.AddArg(imag)
+}
+
+func decomposeInterfacePhi(v *Value) {
+	ptrType := v.Block.Func.Config.fe.TypeBytePtr()
+
+	itab := v.Block.NewValue0(v.Line, OpPhi, ptrType)
+	data := v.Block.NewValue0(v.Line, OpPhi, ptrType)
+	for _, a := range v.Args {
+		itab.AddArg(a.Block.NewValue1(v.Line, OpITab, ptrType, a))
+		data.AddArg(a.Block.NewValue1(v.Line, OpIData, ptrType, a))
+	}
+	v.reset(OpIMake)
+	v.AddArg(itab)
+	v.AddArg(data)
+}
+
+func decomposeUser(f *Func) {
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			decomposeUserPhi(v)
+		}
+	}
+	// Split up named values into their components.
+	// NOTE: the component values we are making are dead at this point.
+	// We must do the opt pass before any deadcode elimination or we will
+	// lose the name->value correspondence.
+	i := 0
+	for _, name := range f.Names {
+		t := name.Type
+		switch {
+		case t.IsStruct():
+			n := t.NumFields()
+			for _, v := range f.NamedValues[name] {
+				for i := int64(0); i < n; i++ {
+					fname := LocalSlot{name.N, t.FieldType(i), name.Off + t.FieldOff(i)} // TODO: use actual field name?
+					x := v.Block.NewValue1I(v.Line, OpStructSelect, t.FieldType(i), i, v)
+					f.NamedValues[fname] = append(f.NamedValues[fname], x)
+				}
+			}
+			delete(f.NamedValues, name)
+		default:
+			f.Names[i] = name
+			i++
+		}
+	}
+	f.Names = f.Names[:i]
+}
+
+func decomposeUserPhi(v *Value) {
+	switch {
+	case v.Type.IsStruct():
+		decomposeStructPhi(v)
+	}
+	// TODO: Arrays of length 1?
+}
+
+func decomposeStructPhi(v *Value) {
+	t := v.Type
+	n := t.NumFields()
+	var fields [MaxStruct]*Value
+	for i := int64(0); i < n; i++ {
+		fields[i] = v.Block.NewValue0(v.Line, OpPhi, t.FieldType(i))
+	}
+	for _, a := range v.Args {
+		for i := int64(0); i < n; i++ {
+			fields[i].AddArg(a.Block.NewValue1I(v.Line, OpStructSelect, t.FieldType(i), i, a))
+		}
+	}
+	v.reset(StructMakeOp(n))
+	v.AddArgs(fields[:n]...)
+
+	// Recursively decompose phis for each field.
+	for _, f := range fields[:n] {
+		if f.Type.IsStruct() {
+			decomposeStructPhi(f)
+		}
+	}
+}
+
+// MaxStruct is the maximum number of fields a struct
+// can have and still be SSAable.
+const MaxStruct = 4
+
+// StructMakeOp returns the opcode to construct a struct with the
+// given number of fields.
+func StructMakeOp(nf int64) Op {
+	switch nf {
+	case 0:
+		return OpStructMake0
+	case 1:
+		return OpStructMake1
+	case 2:
+		return OpStructMake2
+	case 3:
+		return OpStructMake3
+	case 4:
+		return OpStructMake4
+	}
+	panic("too many fields in an SSAable struct")
+}
--- a/src/cmd/compile/internal/ssa/dom.go
+++ b/src/cmd/compile/internal/ssa/dom.go
@ -0,0 +1,367 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// mark values
+const (
+	notFound    = 0 // block has not been discovered yet
+	notExplored = 1 // discovered and in queue, outedges not processed yet
+	explored    = 2 // discovered and in queue, outedges processed
+	done        = 3 // all done, in output ordering
+)
+
+// This file contains code to compute the dominator tree
+// of a control-flow graph.
+
+// postorder computes a postorder traversal ordering for the
+// basic blocks in f.  Unreachable blocks will not appear.
+func postorder(f *Func) []*Block {
+	mark := make([]byte, f.NumBlocks())
+
+	// result ordering
+	var order []*Block
+
+	// stack of blocks
+	var s []*Block
+	s = append(s, f.Entry)
+	mark[f.Entry.ID] = notExplored
+	for len(s) > 0 {
+		b := s[len(s)-1]
+		switch mark[b.ID] {
+		case explored:
+			// Children have all been visited.  Pop & output block.
+			s = s[:len(s)-1]
+			mark[b.ID] = done
+			order = append(order, b)
+		case notExplored:
+			// Children have not been visited yet.  Mark as explored
+			// and queue any children we haven't seen yet.
+			mark[b.ID] = explored
+			for _, c := range b.Succs {
+				if mark[c.ID] == notFound {
+					mark[c.ID] = notExplored
+					s = append(s, c)
+				}
+			}
+		default:
+			b.Fatalf("bad stack state %v %d", b, mark[b.ID])
+		}
+	}
+	return order
+}
+
+type linkedBlocks func(*Block) []*Block
+
+const nscratchslices = 8
+
+// experimentally, functions with 512 or fewer blocks account
+// for 75% of memory (size) allocation for dominator computation
+// in make.bash.
+const minscratchblocks = 512
+
+func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g, h []ID) {
+	tot := maxBlockID * nscratchslices
+	scratch := cfg.domblockstore
+	if len(scratch) < tot {
+		// req = min(1.5*tot, nscratchslices*minscratchblocks)
+		// 50% padding allows for graph growth in later phases.
+		req := (tot * 3) >> 1
+		if req < nscratchslices*minscratchblocks {
+			req = nscratchslices * minscratchblocks
+		}
+		scratch = make([]ID, req)
+		cfg.domblockstore = scratch
+	} else {
+		// Clear as much of scratch as we will (re)use
+		scratch = scratch[0:tot]
+		for i := range scratch {
+			scratch[i] = 0
+		}
+	}
+
+	a = scratch[0*maxBlockID : 1*maxBlockID]
+	b = scratch[1*maxBlockID : 2*maxBlockID]
+	c = scratch[2*maxBlockID : 3*maxBlockID]
+	d = scratch[3*maxBlockID : 4*maxBlockID]
+	e = scratch[4*maxBlockID : 5*maxBlockID]
+	f = scratch[5*maxBlockID : 6*maxBlockID]
+	g = scratch[6*maxBlockID : 7*maxBlockID]
+	h = scratch[7*maxBlockID : 8*maxBlockID]
+
+	return
+}
+
+// dfs performs a depth first search over the blocks starting at the set of
+// blocks in the entries list (in arbitrary order). dfnum contains a mapping
+// from block id to an int indicating the order the block was reached or
+// notFound if the block was not reached.  order contains a mapping from dfnum
+// to block.
+func (f *Func) dfs(entries []*Block, succFn linkedBlocks, dfnum, order, parent []ID) (fromID []*Block) {
+	maxBlockID := entries[0].Func.NumBlocks()
+
+	fromID = make([]*Block, maxBlockID)
+
+	for _, entry := range entries[0].Func.Blocks {
+		eid := entry.ID
+		if fromID[eid] != nil {
+			panic("Colliding entry IDs")
+		}
+		fromID[eid] = entry
+	}
+
+	n := ID(0)
+	s := make([]*Block, 0, 256)
+	for _, entry := range entries {
+		if dfnum[entry.ID] != notFound {
+			continue // already found from a previous entry
+		}
+		s = append(s, entry)
+		parent[entry.ID] = entry.ID
+		for len(s) > 0 {
+			node := s[len(s)-1]
+			s = s[:len(s)-1]
+
+			n++
+			for _, w := range succFn(node) {
+				// if it has a dfnum, we've already visited it
+				if dfnum[w.ID] == notFound {
+					s = append(s, w)
+					parent[w.ID] = node.ID
+					dfnum[w.ID] = notExplored
+				}
+			}
+			dfnum[node.ID] = n
+			order[n] = node.ID
+		}
+	}
+
+	return
+}
+
+// dominators computes the dominator tree for f.  It returns a slice
+// which maps block ID to the immediate dominator of that block.
+// Unreachable blocks map to nil.  The entry block maps to nil.
+func dominators(f *Func) []*Block {
+	preds := func(b *Block) []*Block { return b.Preds }
+	succs := func(b *Block) []*Block { return b.Succs }
+
+	//TODO: benchmark and try to find criteria for swapping between
+	// dominatorsSimple and dominatorsLT
+	return f.dominatorsLT([]*Block{f.Entry}, preds, succs)
+}
+
+// postDominators computes the post-dominator tree for f.
+func postDominators(f *Func) []*Block {
+	preds := func(b *Block) []*Block { return b.Preds }
+	succs := func(b *Block) []*Block { return b.Succs }
+
+	if len(f.Blocks) == 0 {
+		return nil
+	}
+
+	// find the exit blocks
+	var exits []*Block
+	for i := len(f.Blocks) - 1; i >= 0; i-- {
+		switch f.Blocks[i].Kind {
+		case BlockExit, BlockRet, BlockRetJmp, BlockCall, BlockCheck:
+			exits = append(exits, f.Blocks[i])
+			break
+		}
+	}
+
+	// infinite loop with no exit
+	if exits == nil {
+		return make([]*Block, f.NumBlocks())
+	}
+	return f.dominatorsLT(exits, succs, preds)
+}
+
+// dominatorsLt runs Lengauer-Tarjan to compute a dominator tree starting at
+// entry and using predFn/succFn to find predecessors/successors to allow
+// computing both dominator and post-dominator trees.
+func (f *Func) dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block {
+	// Based on Lengauer-Tarjan from Modern Compiler Implementation in C -
+	// Appel with optimizations from Finding Dominators in Practice -
+	// Georgiadis
+
+	maxBlockID := entries[0].Func.NumBlocks()
+
+	dfnum, vertex, parent, semi, samedom, ancestor, best, bucket := f.Config.scratchBlocksForDom(maxBlockID)
+
+	// dfnum := make([]ID, maxBlockID) // conceptually int32, but punning for allocation purposes.
+	// vertex := make([]ID, maxBlockID)
+	// parent := make([]ID, maxBlockID)
+
+	// semi := make([]ID, maxBlockID)
+	// samedom := make([]ID, maxBlockID)
+	// ancestor := make([]ID, maxBlockID)
+	// best := make([]ID, maxBlockID)
+	// bucket := make([]ID, maxBlockID)
+
+	// Step 1. Carry out a depth first search of the problem graph. Number
+	// the vertices from 1 to n as they are reached during the search.
+	fromID := f.dfs(entries, succFn, dfnum, vertex, parent)
+
+	idom := make([]*Block, maxBlockID)
+
+	// Step 2. Compute the semidominators of all vertices by applying
+	// Theorem 4.  Carry out the computation vertex by vertex in decreasing
+	// order by number.
+	for i := maxBlockID - 1; i > 0; i-- {
+		w := vertex[i]
+		if w == 0 {
+			continue
+		}
+
+		if dfnum[w] == notFound {
+			// skip unreachable node
+			continue
+		}
+
+		// Step 3. Implicitly define the immediate dominator of each
+		// vertex by applying Corollary 1. (reordered)
+		for v := bucket[w]; v != 0; v = bucket[v] {
+			u := eval(v, ancestor, semi, dfnum, best)
+			if semi[u] == semi[v] {
+				idom[v] = fromID[w] // true dominator
+			} else {
+				samedom[v] = u // v has same dominator as u
+			}
+		}
+
+		p := parent[w]
+		s := p // semidominator
+
+		var sp ID
+		// calculate the semidominator of w
+		for _, v := range predFn(fromID[w]) {
+			if dfnum[v.ID] == notFound {
+				// skip unreachable predecessor
+				continue
+			}
+
+			if dfnum[v.ID] <= dfnum[w] {
+				sp = v.ID
+			} else {
+				sp = semi[eval(v.ID, ancestor, semi, dfnum, best)]
+			}
+
+			if dfnum[sp] < dfnum[s] {
+				s = sp
+			}
+		}
+
+		// link
+		ancestor[w] = p
+		best[w] = w
+
+		semi[w] = s
+		if semi[s] != parent[s] {
+			bucket[w] = bucket[s]
+			bucket[s] = w
+		}
+	}
+
+	// Final pass of step 3
+	for v := bucket[0]; v != 0; v = bucket[v] {
+		idom[v] = fromID[bucket[0]]
+	}
+
+	// Step 4. Explictly define the immediate dominator of each vertex,
+	// carrying out the computation vertex by vertex in increasing order by
+	// number.
+	for i := 1; i < maxBlockID-1; i++ {
+		w := vertex[i]
+		if w == 0 {
+			continue
+		}
+		// w has the same dominator as samedom[w]
+		if samedom[w] != 0 {
+			idom[w] = idom[samedom[w]]
+		}
+	}
+	return idom
+}
+
+// eval function from LT paper with path compression
+func eval(v ID, ancestor []ID, semi []ID, dfnum []ID, best []ID) ID {
+	a := ancestor[v]
+	if ancestor[a] != 0 {
+		bid := eval(a, ancestor, semi, dfnum, best)
+		ancestor[v] = ancestor[a]
+		if dfnum[semi[bid]] < dfnum[semi[best[v]]] {
+			best[v] = bid
+		}
+	}
+	return best[v]
+}
+
+// dominators computes the dominator tree for f.  It returns a slice
+// which maps block ID to the immediate dominator of that block.
+// Unreachable blocks map to nil.  The entry block maps to nil.
+func dominatorsSimple(f *Func) []*Block {
+	// A simple algorithm for now
+	// Cooper, Harvey, Kennedy
+	idom := make([]*Block, f.NumBlocks())
+
+	// Compute postorder walk
+	post := postorder(f)
+
+	// Make map from block id to order index (for intersect call)
+	postnum := make([]int, f.NumBlocks())
+	for i, b := range post {
+		postnum[b.ID] = i
+	}
+
+	// Make the entry block a self-loop
+	idom[f.Entry.ID] = f.Entry
+	if postnum[f.Entry.ID] != len(post)-1 {
+		f.Fatalf("entry block %v not last in postorder", f.Entry)
+	}
+
+	// Compute relaxation of idom entries
+	for {
+		changed := false
+
+		for i := len(post) - 2; i >= 0; i-- {
+			b := post[i]
+			var d *Block
+			for _, p := range b.Preds {
+				if idom[p.ID] == nil {
+					continue
+				}
+				if d == nil {
+					d = p
+					continue
+				}
+				d = intersect(d, p, postnum, idom)
+			}
+			if d != idom[b.ID] {
+				idom[b.ID] = d
+				changed = true
+			}
+		}
+		if !changed {
+			break
+		}
+	}
+	// Set idom of entry block to nil instead of itself.
+	idom[f.Entry.ID] = nil
+	return idom
+}
+
+// intersect finds the closest dominator of both b and c.
+// It requires a postorder numbering of all the blocks.
+func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
+	// TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*.
+	for b != c {
+		if postnum[b.ID] < postnum[c.ID] {
+			b = idom[b.ID]
+		} else {
+			c = idom[c.ID]
+		}
+	}
+	return b
+}
--- a/src/cmd/compile/internal/ssa/dom_test.go
+++ b/src/cmd/compile/internal/ssa/dom_test.go
@ -0,0 +1,422 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "testing"
+
+func BenchmarkDominatorsLinear(b *testing.B)     { benchmarkDominators(b, 10000, genLinear) }
+func BenchmarkDominatorsFwdBack(b *testing.B)    { benchmarkDominators(b, 10000, genFwdBack) }
+func BenchmarkDominatorsManyPred(b *testing.B)   { benchmarkDominators(b, 10000, genManyPred) }
+func BenchmarkDominatorsMaxPred(b *testing.B)    { benchmarkDominators(b, 10000, genMaxPred) }
+func BenchmarkDominatorsMaxPredVal(b *testing.B) { benchmarkDominators(b, 10000, genMaxPredValue) }
+
+type blockGen func(size int) []bloc
+
+// genLinear creates an array of blocks that succeed one another
+// b_n -> [b_n+1].
+func genLinear(size int) []bloc {
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Goto(blockn(0)),
+		),
+	)
+	for i := 0; i < size; i++ {
+		blocs = append(blocs, Bloc(blockn(i),
+			Goto(blockn(i+1))))
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	return blocs
+}
+
+// genLinear creates an array of blocks that alternate between
+// b_n -> [b_n+1], b_n -> [b_n+1, b_n-1] , b_n -> [b_n+1, b_n+2]
+func genFwdBack(size int) []bloc {
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto(blockn(0)),
+		),
+	)
+	for i := 0; i < size; i++ {
+		switch i % 2 {
+		case 0:
+			blocs = append(blocs, Bloc(blockn(i),
+				If("p", blockn(i+1), blockn(i+2))))
+		case 1:
+			blocs = append(blocs, Bloc(blockn(i),
+				If("p", blockn(i+1), blockn(i-1))))
+		}
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	return blocs
+}
+
+// genManyPred creates an array of blocks where 1/3rd have a sucessor of the
+// first block, 1/3rd the last block, and the remaining third are plain.
+func genManyPred(size int) []bloc {
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto(blockn(0)),
+		),
+	)
+
+	// We want predecessor lists to be long, so 2/3rds of the blocks have a
+	// sucessor of the first or last block.
+	for i := 0; i < size; i++ {
+		switch i % 3 {
+		case 0:
+			blocs = append(blocs, Bloc(blockn(i),
+				Valu("a", OpConstBool, TypeBool, 1, nil),
+				Goto(blockn(i+1))))
+		case 1:
+			blocs = append(blocs, Bloc(blockn(i),
+				Valu("a", OpConstBool, TypeBool, 1, nil),
+				If("p", blockn(i+1), blockn(0))))
+		case 2:
+			blocs = append(blocs, Bloc(blockn(i),
+				Valu("a", OpConstBool, TypeBool, 1, nil),
+				If("p", blockn(i+1), blockn(size))))
+		}
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	return blocs
+}
+
+// genMaxPred maximizes the size of the 'exit' predecessor list.
+func genMaxPred(size int) []bloc {
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto(blockn(0)),
+		),
+	)
+
+	for i := 0; i < size; i++ {
+		blocs = append(blocs, Bloc(blockn(i),
+			If("p", blockn(i+1), "exit")))
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	return blocs
+}
+
+// genMaxPredValue is identical to genMaxPred but contains an
+// additional value.
+func genMaxPredValue(size int) []bloc {
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto(blockn(0)),
+		),
+	)
+
+	for i := 0; i < size; i++ {
+		blocs = append(blocs, Bloc(blockn(i),
+			Valu("a", OpConstBool, TypeBool, 1, nil),
+			If("p", blockn(i+1), "exit")))
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	return blocs
+}
+
+// sink for benchmark
+var domBenchRes []*Block
+
+func benchmarkDominators(b *testing.B, size int, bg blockGen) {
+	c := NewConfig("amd64", DummyFrontend{b}, nil, true)
+	fun := Fun(c, "entry", bg(size)...)
+
+	CheckFunc(fun.f)
+	b.SetBytes(int64(size))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		domBenchRes = dominators(fun.f)
+	}
+}
+
+type domFunc func(f *Func) []*Block
+
+// verifyDominators verifies that the dominators of fut (function under test)
+// as determined by domFn, match the map node->dominator
+func verifyDominators(t *testing.T, fut fun, domFn domFunc, doms map[string]string) {
+	blockNames := map[*Block]string{}
+	for n, b := range fut.blocks {
+		blockNames[b] = n
+	}
+
+	calcDom := domFn(fut.f)
+
+	for n, d := range doms {
+		nblk, ok := fut.blocks[n]
+		if !ok {
+			t.Errorf("invalid block name %s", n)
+		}
+		dblk, ok := fut.blocks[d]
+		if !ok {
+			t.Errorf("invalid block name %s", d)
+		}
+
+		domNode := calcDom[nblk.ID]
+		switch {
+		case calcDom[nblk.ID] == dblk:
+			calcDom[nblk.ID] = nil
+			continue
+		case calcDom[nblk.ID] != dblk:
+			t.Errorf("expected %s as dominator of %s, found %s", d, n, blockNames[domNode])
+		default:
+			t.Fatal("unexpected dominator condition")
+		}
+	}
+
+	for id, d := range calcDom {
+		// If nil, we've already verified it
+		if d == nil {
+			continue
+		}
+		for _, b := range fut.blocks {
+			if int(b.ID) == id {
+				t.Errorf("unexpected dominator of %s for %s", blockNames[d], blockNames[b])
+			}
+		}
+	}
+
+}
+
+func TestDominatorsSingleBlock(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Exit("mem")))
+
+	doms := map[string]string{}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+
+}
+
+func TestDominatorsSimple(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Goto("a")),
+		Bloc("a",
+			Goto("b")),
+		Bloc("b",
+			Goto("c")),
+		Bloc("c",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	doms := map[string]string{
+		"a":    "entry",
+		"b":    "a",
+		"c":    "b",
+		"exit": "c",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+
+}
+
+func TestDominatorsMultPredFwd(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			If("p", "a", "c")),
+		Bloc("a",
+			If("p", "b", "c")),
+		Bloc("b",
+			Goto("c")),
+		Bloc("c",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	doms := map[string]string{
+		"a":    "entry",
+		"b":    "a",
+		"c":    "entry",
+		"exit": "c",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+}
+
+func TestDominatorsDeadCode(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 0, nil),
+			If("p", "b3", "b5")),
+		Bloc("b2", Exit("mem")),
+		Bloc("b3", Goto("b2")),
+		Bloc("b4", Goto("b2")),
+		Bloc("b5", Goto("b2")))
+
+	doms := map[string]string{
+		"b2": "entry",
+		"b3": "entry",
+		"b5": "entry",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+}
+
+func TestDominatorsMultPredRev(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Goto("first")),
+		Bloc("first",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto("a")),
+		Bloc("a",
+			If("p", "b", "first")),
+		Bloc("b",
+			Goto("c")),
+		Bloc("c",
+			If("p", "exit", "b")),
+		Bloc("exit",
+			Exit("mem")))
+
+	doms := map[string]string{
+		"first": "entry",
+		"a":     "first",
+		"b":     "a",
+		"c":     "b",
+		"exit":  "c",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+}
+
+func TestDominatorsMultPred(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			If("p", "a", "c")),
+		Bloc("a",
+			If("p", "b", "c")),
+		Bloc("b",
+			Goto("c")),
+		Bloc("c",
+			If("p", "b", "exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	doms := map[string]string{
+		"a":    "entry",
+		"b":    "entry",
+		"c":    "entry",
+		"exit": "c",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, dominators, doms)
+	verifyDominators(t, fun, dominatorsSimple, doms)
+}
+
+func TestPostDominators(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			If("p", "a", "c")),
+		Bloc("a",
+			If("p", "b", "c")),
+		Bloc("b",
+			Goto("c")),
+		Bloc("c",
+			If("p", "b", "exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	doms := map[string]string{"entry": "c",
+		"a": "c",
+		"b": "c",
+		"c": "exit",
+	}
+
+	CheckFunc(fun.f)
+	verifyDominators(t, fun, postDominators, doms)
+}
+
+func TestInfiniteLoop(t *testing.T) {
+	c := testConfig(t)
+	// note lack of an exit block
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("p", OpConstBool, TypeBool, 1, nil),
+			Goto("a")),
+		Bloc("a",
+			Goto("b")),
+		Bloc("b",
+			Goto("a")))
+
+	CheckFunc(fun.f)
+	doms := map[string]string{"a": "entry",
+		"b": "a"}
+	verifyDominators(t, fun, dominators, doms)
+
+	// no exit block, so there are no post-dominators
+	postDoms := map[string]string{}
+	verifyDominators(t, fun, postDominators, postDoms)
+}
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@ -0,0 +1,67 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"cmd/internal/obj"
+	"testing"
+)
+
+var CheckFunc = checkFunc
+var PrintFunc = printFunc
+var Opt = opt
+var Deadcode = deadcode
+
+func testConfig(t *testing.T) *Config {
+	testCtxt := &obj.Link{}
+	return NewConfig("amd64", DummyFrontend{t}, testCtxt, true)
+}
+
+// DummyFrontend is a test-only frontend.
+// It assumes 64 bit integers and pointers.
+type DummyFrontend struct {
+	t testing.TB
+}
+
+func (DummyFrontend) StringData(s string) interface{} {
+	return nil
+}
+func (DummyFrontend) Auto(t Type) GCNode {
+	return nil
+}
+func (DummyFrontend) Line(line int32) string {
+	return "unknown.go:0"
+}
+
+func (d DummyFrontend) Logf(msg string, args ...interface{}) { d.t.Logf(msg, args...) }
+func (d DummyFrontend) Log() bool                            { return true }
+
+func (d DummyFrontend) Fatalf(line int32, msg string, args ...interface{}) { d.t.Fatalf(msg, args...) }
+func (d DummyFrontend) Unimplementedf(line int32, msg string, args ...interface{}) {
+	d.t.Fatalf(msg, args...)
+}
+func (d DummyFrontend) Warnl(line int, msg string, args ...interface{}) { d.t.Logf(msg, args...) }
+func (d DummyFrontend) Debug_checknil() bool                            { return false }
+
+func (d DummyFrontend) TypeBool() Type    { return TypeBool }
+func (d DummyFrontend) TypeInt8() Type    { return TypeInt8 }
+func (d DummyFrontend) TypeInt16() Type   { return TypeInt16 }
+func (d DummyFrontend) TypeInt32() Type   { return TypeInt32 }
+func (d DummyFrontend) TypeInt64() Type   { return TypeInt64 }
+func (d DummyFrontend) TypeUInt8() Type   { return TypeUInt8 }
+func (d DummyFrontend) TypeUInt16() Type  { return TypeUInt16 }
+func (d DummyFrontend) TypeUInt32() Type  { return TypeUInt32 }
+func (d DummyFrontend) TypeUInt64() Type  { return TypeUInt64 }
+func (d DummyFrontend) TypeFloat32() Type { return TypeFloat32 }
+func (d DummyFrontend) TypeFloat64() Type { return TypeFloat64 }
+func (d DummyFrontend) TypeInt() Type     { return TypeInt64 }
+func (d DummyFrontend) TypeUintptr() Type { return TypeUInt64 }
+func (d DummyFrontend) TypeString() Type  { panic("unimplemented") }
+func (d DummyFrontend) TypeBytePtr() Type { return TypeBytePtr }
+
+func (d DummyFrontend) CanSSA(t Type) bool {
+	// There are no un-SSAable types in dummy land.
+	return true
+}
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@ -0,0 +1,131 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+const flagRegMask = regMask(1) << 33 // TODO: arch-specific
+
+// flagalloc allocates the flag register among all the flag-generating
+// instructions.  Flag values are recomputed if they need to be
+// spilled/restored.
+func flagalloc(f *Func) {
+	// Compute the in-register flag value we want at the end of
+	// each block.  This is basically a best-effort live variable
+	// analysis, so it can be much simpler than a full analysis.
+	// TODO: do we really need to keep flag values live across blocks?
+	// Could we force the flags register to be unused at basic block
+	// boundaries?  Then we wouldn't need this computation.
+	end := make([]*Value, f.NumBlocks())
+	for n := 0; n < 2; n++ {
+		// Walk blocks backwards.  Poor-man's postorder traversal.
+		for i := len(f.Blocks) - 1; i >= 0; i-- {
+			b := f.Blocks[i]
+			// Walk values backwards to figure out what flag
+			// value we want in the flag register at the start
+			// of the block.
+			flag := end[b.ID]
+			if b.Control != nil && b.Control.Type.IsFlags() {
+				flag = b.Control
+			}
+			for j := len(b.Values) - 1; j >= 0; j-- {
+				v := b.Values[j]
+				if v == flag {
+					flag = nil
+				}
+				if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
+					flag = nil
+				}
+				for _, a := range v.Args {
+					if a.Type.IsFlags() {
+						flag = a
+					}
+				}
+			}
+			if flag != nil {
+				for _, p := range b.Preds {
+					end[p.ID] = flag
+				}
+			}
+		}
+	}
+
+	// For blocks which have a flags control value, that's the only value
+	// we can leave in the flags register at the end of the block. (There
+	// is no place to put a flag regeneration instruction.)
+	for _, b := range f.Blocks {
+		v := b.Control
+		if v != nil && v.Type.IsFlags() && end[b.ID] != v {
+			end[b.ID] = nil
+		}
+	}
+
+	// Add flag recomputations where they are needed.
+	// TODO: Remove original instructions if they are never used.
+	var oldSched []*Value
+	for _, b := range f.Blocks {
+		oldSched = append(oldSched[:0], b.Values...)
+		b.Values = b.Values[:0]
+		// The current live flag value the pre-flagalloc copy).
+		var flag *Value
+		if len(b.Preds) > 0 {
+			flag = end[b.Preds[0].ID]
+			// Note: the following condition depends on the lack of critical edges.
+			for _, p := range b.Preds[1:] {
+				if end[p.ID] != flag {
+					f.Fatalf("live flag in %s's predecessors not consistent", b)
+				}
+			}
+		}
+		for _, v := range oldSched {
+			if v.Op == OpPhi && v.Type.IsFlags() {
+				f.Fatalf("phi of flags not supported: %s", v.LongString())
+			}
+			// Make sure any flag arg of v is in the flags register.
+			// If not, recompute it.
+			for i, a := range v.Args {
+				if !a.Type.IsFlags() {
+					continue
+				}
+				if a == flag {
+					continue
+				}
+				// Recalculate a
+				c := a.copyInto(b)
+				// Update v.
+				v.SetArg(i, c)
+				// Remember the most-recently computed flag value.
+				flag = a
+			}
+			// Issue v.
+			b.Values = append(b.Values, v)
+			if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
+				flag = nil
+			}
+			if v.Type.IsFlags() {
+				flag = v
+			}
+		}
+		if v := b.Control; v != nil && v != flag && v.Type.IsFlags() {
+			// Recalculate control value.
+			c := v.copyInto(b)
+			b.Control = c
+			flag = v
+		}
+		if v := end[b.ID]; v != nil && v != flag {
+			// Need to reissue flag generator for use by
+			// subsequent blocks.
+			_ = v.copyInto(b)
+			// Note: this flag generator is not properly linked up
+			// with the flag users.  This breaks the SSA representation.
+			// We could fix up the users with another pass, but for now
+			// we'll just leave it.  (Regalloc has the same issue for
+			// standard regs, and it runs next.)
+		}
+	}
+
+	// Save live flag state for later.
+	for _, b := range f.Blocks {
+		b.FlagsLiveAtEnd = end[b.ID] != nil
+	}
+}
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@ -0,0 +1,352 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"math"
+)
+
+// A Func represents a Go func declaration (or function literal) and
+// its body.  This package compiles each Func independently.
+type Func struct {
+	Config     *Config     // architecture information
+	pass       *pass       // current pass information (name, options, etc.)
+	Name       string      // e.g. bytes·Compare
+	Type       Type        // type signature of the function.
+	StaticData interface{} // associated static data, untouched by the ssa package
+	Blocks     []*Block    // unordered set of all basic blocks (note: not indexable by ID)
+	Entry      *Block      // the entry basic block
+	bid        idAlloc     // block ID allocator
+	vid        idAlloc     // value ID allocator
+
+	scheduled bool // Values in Blocks are in final order
+
+	// when register allocation is done, maps value ids to locations
+	RegAlloc []Location
+
+	// map from LocalSlot to set of Values that we want to store in that slot.
+	NamedValues map[LocalSlot][]*Value
+	// Names is a copy of NamedValues.Keys.  We keep a separate list
+	// of keys to make iteration order deterministic.
+	Names []LocalSlot
+
+	freeValues *Value // free Values linked by argstorage[0].  All other fields except ID are 0/nil.
+	freeBlocks *Block // free Blocks linked by succstorage[0].  All other fields except ID are 0/nil.
+
+	constants map[int64][]*Value // constants cache, keyed by constant value; users must check value's Op and Type
+}
+
+// NumBlocks returns an integer larger than the id of any Block in the Func.
+func (f *Func) NumBlocks() int {
+	return f.bid.num()
+}
+
+// NumValues returns an integer larger than the id of any Value in the Func.
+func (f *Func) NumValues() int {
+	return f.vid.num()
+}
+
+// newSparseSet returns a sparse set that can store at least up to n integers.
+func (f *Func) newSparseSet(n int) *sparseSet {
+	for i, scr := range f.Config.scrSparse {
+		if scr != nil && scr.cap() >= n {
+			f.Config.scrSparse[i] = nil
+			scr.clear()
+			return scr
+		}
+	}
+	return newSparseSet(n)
+}
+
+// retSparseSet returns a sparse set to the config's cache of sparse sets to be reused by f.newSparseSet.
+func (f *Func) retSparseSet(ss *sparseSet) {
+	for i, scr := range f.Config.scrSparse {
+		if scr == nil {
+			f.Config.scrSparse[i] = ss
+			return
+		}
+	}
+	f.Config.scrSparse = append(f.Config.scrSparse, ss)
+}
+
+// newValue allocates a new Value with the given fields and places it at the end of b.Values.
+func (f *Func) newValue(op Op, t Type, b *Block, line int32) *Value {
+	var v *Value
+	if f.freeValues != nil {
+		v = f.freeValues
+		f.freeValues = v.argstorage[0]
+		v.argstorage[0] = nil
+	} else {
+		ID := f.vid.get()
+		if int(ID) < len(f.Config.values) {
+			v = &f.Config.values[ID]
+		} else {
+			v = &Value{ID: ID}
+		}
+	}
+	v.Op = op
+	v.Type = t
+	v.Block = b
+	v.Line = line
+	b.Values = append(b.Values, v)
+	return v
+}
+
+// logPassStat writes a string key and int value as a warning in a
+// tab-separated format easily handled by spreadsheets or awk.
+// file names, lines, and function names are included to provide enough (?)
+// context to allow item-by-item comparisons across runs.
+// For example:
+// awk 'BEGIN {FS="\t"} $3~/TIME/{sum+=$4} END{print "t(ns)=",sum}' t.log
+func (f *Func) logStat(key string, args ...interface{}) {
+	value := ""
+	for _, a := range args {
+		value += fmt.Sprintf("\t%v", a)
+	}
+	f.Config.Warnl(int(f.Entry.Line), "\t%s\t%s%s\t%s", f.pass.name, key, value, f.Name)
+}
+
+// freeValue frees a value.  It must no longer be referenced.
+func (f *Func) freeValue(v *Value) {
+	if v.Block == nil {
+		f.Fatalf("trying to free an already freed value")
+	}
+	// Clear everything but ID (which we reuse).
+	id := v.ID
+	*v = Value{}
+	v.ID = id
+	v.argstorage[0] = f.freeValues
+	f.freeValues = v
+}
+
+// newBlock allocates a new Block of the given kind and places it at the end of f.Blocks.
+func (f *Func) NewBlock(kind BlockKind) *Block {
+	var b *Block
+	if f.freeBlocks != nil {
+		b = f.freeBlocks
+		f.freeBlocks = b.succstorage[0]
+		b.succstorage[0] = nil
+	} else {
+		ID := f.bid.get()
+		if int(ID) < len(f.Config.blocks) {
+			b = &f.Config.blocks[ID]
+		} else {
+			b = &Block{ID: ID}
+		}
+	}
+	b.Kind = kind
+	b.Func = f
+	b.Preds = b.predstorage[:0]
+	b.Succs = b.succstorage[:0]
+	b.Values = b.valstorage[:0]
+	f.Blocks = append(f.Blocks, b)
+	return b
+}
+
+func (f *Func) freeBlock(b *Block) {
+	if b.Func == nil {
+		f.Fatalf("trying to free an already freed block")
+	}
+	// Clear everything but ID (which we reuse).
+	id := b.ID
+	*b = Block{}
+	b.ID = id
+	b.succstorage[0] = f.freeBlocks
+	f.freeBlocks = b
+}
+
+// NewValue0 returns a new value in the block with no arguments and zero aux values.
+func (b *Block) NewValue0(line int32, op Op, t Type) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Args = v.argstorage[:0]
+	return v
+}
+
+// NewValue returns a new value in the block with no arguments and an auxint value.
+func (b *Block) NewValue0I(line int32, op Op, t Type, auxint int64) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Args = v.argstorage[:0]
+	return v
+}
+
+// NewValue returns a new value in the block with no arguments and an aux value.
+func (b *Block) NewValue0A(line int32, op Op, t Type, aux interface{}) *Value {
+	if _, ok := aux.(int64); ok {
+		// Disallow int64 aux values.  They should be in the auxint field instead.
+		// Maybe we want to allow this at some point, but for now we disallow it
+		// to prevent errors like using NewValue1A instead of NewValue1I.
+		b.Fatalf("aux field has int64 type op=%s type=%s aux=%v", op, t, aux)
+	}
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Aux = aux
+	v.Args = v.argstorage[:0]
+	return v
+}
+
+// NewValue returns a new value in the block with no arguments and both an auxint and aux values.
+func (b *Block) NewValue0IA(line int32, op Op, t Type, auxint int64, aux interface{}) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Aux = aux
+	v.Args = v.argstorage[:0]
+	return v
+}
+
+// NewValue1 returns a new value in the block with one argument and zero aux values.
+func (b *Block) NewValue1(line int32, op Op, t Type, arg *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Args = v.argstorage[:1]
+	v.argstorage[0] = arg
+	return v
+}
+
+// NewValue1I returns a new value in the block with one argument and an auxint value.
+func (b *Block) NewValue1I(line int32, op Op, t Type, auxint int64, arg *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Args = v.argstorage[:1]
+	v.argstorage[0] = arg
+	return v
+}
+
+// NewValue1A returns a new value in the block with one argument and an aux value.
+func (b *Block) NewValue1A(line int32, op Op, t Type, aux interface{}, arg *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Aux = aux
+	v.Args = v.argstorage[:1]
+	v.argstorage[0] = arg
+	return v
+}
+
+// NewValue1IA returns a new value in the block with one argument and both an auxint and aux values.
+func (b *Block) NewValue1IA(line int32, op Op, t Type, auxint int64, aux interface{}, arg *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Aux = aux
+	v.Args = v.argstorage[:1]
+	v.argstorage[0] = arg
+	return v
+}
+
+// NewValue2 returns a new value in the block with two arguments and zero aux values.
+func (b *Block) NewValue2(line int32, op Op, t Type, arg0, arg1 *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Args = v.argstorage[:2]
+	v.argstorage[0] = arg0
+	v.argstorage[1] = arg1
+	return v
+}
+
+// NewValue2I returns a new value in the block with two arguments and an auxint value.
+func (b *Block) NewValue2I(line int32, op Op, t Type, auxint int64, arg0, arg1 *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Args = v.argstorage[:2]
+	v.argstorage[0] = arg0
+	v.argstorage[1] = arg1
+	return v
+}
+
+// NewValue3 returns a new value in the block with three arguments and zero aux values.
+func (b *Block) NewValue3(line int32, op Op, t Type, arg0, arg1, arg2 *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = 0
+	v.Args = []*Value{arg0, arg1, arg2}
+	return v
+}
+
+// NewValue3I returns a new value in the block with three arguments and an auxint value.
+func (b *Block) NewValue3I(line int32, op Op, t Type, auxint int64, arg0, arg1, arg2 *Value) *Value {
+	v := b.Func.newValue(op, t, b, line)
+	v.AuxInt = auxint
+	v.Args = []*Value{arg0, arg1, arg2}
+	return v
+}
+
+// constVal returns a constant value for c.
+func (f *Func) constVal(line int32, op Op, t Type, c int64) *Value {
+	if f.constants == nil {
+		f.constants = make(map[int64][]*Value)
+	}
+	vv := f.constants[c]
+	for _, v := range vv {
+		if v.Op == op && v.Type.Equal(t) {
+			return v
+		}
+	}
+	v := f.Entry.NewValue0I(line, op, t, c)
+	f.constants[c] = append(vv, v)
+	return v
+}
+
+// ConstInt returns an int constant representing its argument.
+func (f *Func) ConstBool(line int32, t Type, c bool) *Value {
+	i := int64(0)
+	if c {
+		i = 1
+	}
+	return f.constVal(line, OpConstBool, t, i)
+}
+func (f *Func) ConstInt8(line int32, t Type, c int8) *Value {
+	return f.constVal(line, OpConst8, t, int64(c))
+}
+func (f *Func) ConstInt16(line int32, t Type, c int16) *Value {
+	return f.constVal(line, OpConst16, t, int64(c))
+}
+func (f *Func) ConstInt32(line int32, t Type, c int32) *Value {
+	return f.constVal(line, OpConst32, t, int64(c))
+}
+func (f *Func) ConstInt64(line int32, t Type, c int64) *Value {
+	return f.constVal(line, OpConst64, t, c)
+}
+func (f *Func) ConstFloat32(line int32, t Type, c float64) *Value {
+	return f.constVal(line, OpConst32F, t, int64(math.Float64bits(c)))
+}
+func (f *Func) ConstFloat64(line int32, t Type, c float64) *Value {
+	return f.constVal(line, OpConst64F, t, int64(math.Float64bits(c)))
+}
+
+func (f *Func) Logf(msg string, args ...interface{})   { f.Config.Logf(msg, args...) }
+func (f *Func) Log() bool                              { return f.Config.Log() }
+func (f *Func) Fatalf(msg string, args ...interface{}) { f.Config.Fatalf(f.Entry.Line, msg, args...) }
+func (f *Func) Unimplementedf(msg string, args ...interface{}) {
+	f.Config.Unimplementedf(f.Entry.Line, msg, args...)
+}
+
+func (f *Func) Free() {
+	// Clear values.
+	n := f.vid.num()
+	if n > len(f.Config.values) {
+		n = len(f.Config.values)
+	}
+	for i := 1; i < n; i++ {
+		f.Config.values[i] = Value{}
+		f.Config.values[i].ID = ID(i)
+	}
+
+	// Clear blocks.
+	n = f.bid.num()
+	if n > len(f.Config.blocks) {
+		n = len(f.Config.blocks)
+	}
+	for i := 1; i < n; i++ {
+		f.Config.blocks[i] = Block{}
+		f.Config.blocks[i].ID = ID(i)
+	}
+
+	// Unregister from config.
+	if f.Config.curFunc != f {
+		f.Fatalf("free of function which isn't the last one allocated")
+	}
+	f.Config.curFunc = nil
+	*f = Func{} // just in case
+}
--- a/src/cmd/compile/internal/ssa/func_test.go
+++ b/src/cmd/compile/internal/ssa/func_test.go
@ -0,0 +1,445 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains some utility functions to help define Funcs for testing.
+// As an example, the following func
+//
+//   b1:
+//     v1 = InitMem <mem>
+//     Plain -> b2
+//   b2:
+//     Exit v1
+//   b3:
+//     v2 = Const <bool> [true]
+//     If v2 -> b3 b2
+//
+// can be defined as
+//
+//   fun := Fun("entry",
+//       Bloc("entry",
+//           Valu("mem", OpInitMem, TypeMem, 0, nil),
+//           Goto("exit")),
+//       Bloc("exit",
+//           Exit("mem")),
+//       Bloc("deadblock",
+//          Valu("deadval", OpConstBool, TypeBool, 0, true),
+//          If("deadval", "deadblock", "exit")))
+//
+// and the Blocks or Values used in the Func can be accessed
+// like this:
+//   fun.blocks["entry"] or fun.values["deadval"]
+
+package ssa
+
+// TODO(matloob): Choose better names for Fun, Bloc, Goto, etc.
+// TODO(matloob): Write a parser for the Func disassembly. Maybe
+//                the parser can be used instead of Fun.
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+)
+
+// Compare two Funcs for equivalence. Their CFGs must be isomorphic,
+// and their values must correspond.
+// Requires that values and predecessors are in the same order, even
+// though Funcs could be equivalent when they are not.
+// TODO(matloob): Allow values and predecessors to be in different
+// orders if the CFG are otherwise equivalent.
+func Equiv(f, g *Func) bool {
+	valcor := make(map[*Value]*Value)
+	var checkVal func(fv, gv *Value) bool
+	checkVal = func(fv, gv *Value) bool {
+		if fv == nil && gv == nil {
+			return true
+		}
+		if valcor[fv] == nil && valcor[gv] == nil {
+			valcor[fv] = gv
+			valcor[gv] = fv
+			// Ignore ids. Ops and Types are compared for equality.
+			// TODO(matloob): Make sure types are canonical and can
+			// be compared for equality.
+			if fv.Op != gv.Op || fv.Type != gv.Type || fv.AuxInt != gv.AuxInt {
+				return false
+			}
+			if !reflect.DeepEqual(fv.Aux, gv.Aux) {
+				// This makes the assumption that aux values can be compared
+				// using DeepEqual.
+				// TODO(matloob): Aux values may be *gc.Sym pointers in the near
+				// future. Make sure they are canonical.
+				return false
+			}
+			if len(fv.Args) != len(gv.Args) {
+				return false
+			}
+			for i := range fv.Args {
+				if !checkVal(fv.Args[i], gv.Args[i]) {
+					return false
+				}
+			}
+		}
+		return valcor[fv] == gv && valcor[gv] == fv
+	}
+	blkcor := make(map[*Block]*Block)
+	var checkBlk func(fb, gb *Block) bool
+	checkBlk = func(fb, gb *Block) bool {
+		if blkcor[fb] == nil && blkcor[gb] == nil {
+			blkcor[fb] = gb
+			blkcor[gb] = fb
+			// ignore ids
+			if fb.Kind != gb.Kind {
+				return false
+			}
+			if len(fb.Values) != len(gb.Values) {
+				return false
+			}
+			for i := range fb.Values {
+				if !checkVal(fb.Values[i], gb.Values[i]) {
+					return false
+				}
+			}
+			if len(fb.Succs) != len(gb.Succs) {
+				return false
+			}
+			for i := range fb.Succs {
+				if !checkBlk(fb.Succs[i], gb.Succs[i]) {
+					return false
+				}
+			}
+			if len(fb.Preds) != len(gb.Preds) {
+				return false
+			}
+			for i := range fb.Preds {
+				if !checkBlk(fb.Preds[i], gb.Preds[i]) {
+					return false
+				}
+			}
+			return true
+
+		}
+		return blkcor[fb] == gb && blkcor[gb] == fb
+	}
+
+	return checkBlk(f.Entry, g.Entry)
+}
+
+// fun is the return type of Fun. It contains the created func
+// itself as well as indexes from block and value names into the
+// corresponding Blocks and Values.
+type fun struct {
+	f      *Func
+	blocks map[string]*Block
+	values map[string]*Value
+}
+
+var emptyPass pass = pass{
+	name: "empty pass",
+}
+
+// Fun takes the name of an entry bloc and a series of Bloc calls, and
+// returns a fun containing the composed Func. entry must be a name
+// supplied to one of the Bloc functions. Each of the bloc names and
+// valu names should be unique across the Fun.
+func Fun(c *Config, entry string, blocs ...bloc) fun {
+	f := c.NewFunc()
+	f.pass = &emptyPass
+
+	blocks := make(map[string]*Block)
+	values := make(map[string]*Value)
+	// Create all the blocks and values.
+	for _, bloc := range blocs {
+		b := f.NewBlock(bloc.control.kind)
+		blocks[bloc.name] = b
+		for _, valu := range bloc.valus {
+			// args are filled in the second pass.
+			values[valu.name] = b.NewValue0IA(0, valu.op, valu.t, valu.auxint, valu.aux)
+		}
+	}
+	// Connect the blocks together and specify control values.
+	f.Entry = blocks[entry]
+	for _, bloc := range blocs {
+		b := blocks[bloc.name]
+		c := bloc.control
+		// Specify control values.
+		if c.control != "" {
+			cval, ok := values[c.control]
+			if !ok {
+				f.Fatalf("control value for block %s missing", bloc.name)
+			}
+			b.Control = cval
+		}
+		// Fill in args.
+		for _, valu := range bloc.valus {
+			v := values[valu.name]
+			for _, arg := range valu.args {
+				a, ok := values[arg]
+				if !ok {
+					b.Fatalf("arg %s missing for value %s in block %s",
+						arg, valu.name, bloc.name)
+				}
+				v.AddArg(a)
+			}
+		}
+		// Connect to successors.
+		for _, succ := range c.succs {
+			b.AddEdgeTo(blocks[succ])
+		}
+	}
+	return fun{f, blocks, values}
+}
+
+// Bloc defines a block for Fun. The bloc name should be unique
+// across the containing Fun. entries should consist of calls to valu,
+// as well as one call to Goto, If, or Exit to specify the block kind.
+func Bloc(name string, entries ...interface{}) bloc {
+	b := bloc{}
+	b.name = name
+	seenCtrl := false
+	for _, e := range entries {
+		switch v := e.(type) {
+		case ctrl:
+			// there should be exactly one Ctrl entry.
+			if seenCtrl {
+				panic(fmt.Sprintf("already seen control for block %s", name))
+			}
+			b.control = v
+			seenCtrl = true
+		case valu:
+			b.valus = append(b.valus, v)
+		}
+	}
+	if !seenCtrl {
+		panic(fmt.Sprintf("block %s doesn't have control", b.name))
+	}
+	return b
+}
+
+// Valu defines a value in a block.
+func Valu(name string, op Op, t Type, auxint int64, aux interface{}, args ...string) valu {
+	return valu{name, op, t, auxint, aux, args}
+}
+
+// Goto specifies that this is a BlockPlain and names the single successor.
+// TODO(matloob): choose a better name.
+func Goto(succ string) ctrl {
+	return ctrl{BlockPlain, "", []string{succ}}
+}
+
+// If specifies a BlockIf.
+func If(cond, sub, alt string) ctrl {
+	return ctrl{BlockIf, cond, []string{sub, alt}}
+}
+
+// Exit specifies a BlockExit.
+func Exit(arg string) ctrl {
+	return ctrl{BlockExit, arg, []string{}}
+}
+
+// Eq specifies a BlockAMD64EQ.
+func Eq(cond, sub, alt string) ctrl {
+	return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
+}
+
+// bloc, ctrl, and valu are internal structures used by Bloc, Valu, Goto,
+// If, and Exit to help define blocks.
+
+type bloc struct {
+	name    string
+	control ctrl
+	valus   []valu
+}
+
+type ctrl struct {
+	kind    BlockKind
+	control string
+	succs   []string
+}
+
+type valu struct {
+	name   string
+	op     Op
+	t      Type
+	auxint int64
+	aux    interface{}
+	args   []string
+}
+
+func TestArgs(t *testing.T) {
+	c := testConfig(t)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("a", OpConst64, TypeInt64, 14, nil),
+			Valu("b", OpConst64, TypeInt64, 26, nil),
+			Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+	sum := fun.values["sum"]
+	for i, name := range []string{"a", "b"} {
+		if sum.Args[i] != fun.values[name] {
+			t.Errorf("arg %d for sum is incorrect: want %s, got %s",
+				i, sum.Args[i], fun.values[name])
+		}
+	}
+}
+
+func TestEquiv(t *testing.T) {
+	equivalentCases := []struct{ f, g fun }{
+		// simple case
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+		},
+		// block order changed
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("exit",
+					Exit("mem")),
+				Bloc("entry",
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Goto("exit"))),
+		},
+	}
+	for _, c := range equivalentCases {
+		if !Equiv(c.f.f, c.g.f) {
+			t.Error("expected equivalence. Func definitions:")
+			t.Error(c.f.f)
+			t.Error(c.g.f)
+		}
+	}
+
+	differentCases := []struct{ f, g fun }{
+		// different shape
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Exit("mem"))),
+		},
+		// value order changed
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Exit("mem"))),
+		},
+		// value auxint different
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 26, nil),
+					Exit("mem"))),
+		},
+		// value aux different
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 0, 14),
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 0, 26),
+					Exit("mem"))),
+		},
+		// value args different
+		{
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 14, nil),
+					Valu("b", OpConst64, TypeInt64, 26, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "a", "b"),
+					Exit("mem"))),
+			Fun(testConfig(t), "entry",
+				Bloc("entry",
+					Valu("mem", OpInitMem, TypeMem, 0, nil),
+					Valu("a", OpConst64, TypeInt64, 0, nil),
+					Valu("b", OpConst64, TypeInt64, 14, nil),
+					Valu("sum", OpAdd64, TypeInt64, 0, nil, "b", "a"),
+					Exit("mem"))),
+		},
+	}
+	for _, c := range differentCases {
+		if Equiv(c.f.f, c.g.f) {
+			t.Error("expected difference. Func definitions:")
+			t.Error(c.f.f)
+			t.Error(c.g.f)
+		}
+	}
+}
+
+// opcodeMap returns a map from opcode to the number of times that opcode
+// appears in the function.
+func opcodeMap(f *Func) map[Op]int {
+	m := map[Op]int{}
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			m[v.Op]++
+		}
+	}
+	return m
+}
+
+// opcodeCounts checks that the number of opcodes listed in m agree with the
+// number of opcodes that appear in the function.
+func checkOpcodeCounts(t *testing.T, f *Func, m map[Op]int) {
+	n := opcodeMap(f)
+	for op, cnt := range m {
+		if n[op] != cnt {
+			t.Errorf("%s appears %d times, want %d times", op, n[op], cnt)
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@ -0,0 +1,158 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// fuse simplifies control flow by joining basic blocks.
+func fuse(f *Func) {
+	for changed := true; changed; {
+		changed = false
+		for _, b := range f.Blocks {
+			changed = fuseBlockIf(b) || changed
+			changed = fuseBlockPlain(b) || changed
+		}
+	}
+}
+
+// fuseBlockIf handles the following cases where s0 and s1 are empty blocks.
+//
+//   b        b        b      b
+//  / \      | \      / |    | |
+// s0  s1    |  s1   s0 |    | |
+//  \ /      | /      \ |    | |
+//   ss      ss        ss     ss
+//
+// If all Phi ops in ss have identical variables for slots corresponding to
+// s0, s1 and b then the branch can be dropped.
+// TODO: If ss doesn't contain any OpPhis, are s0 and s1 dead code anyway.
+func fuseBlockIf(b *Block) bool {
+	if b.Kind != BlockIf {
+		return false
+	}
+
+	var ss0, ss1 *Block
+	s0 := b.Succs[0]
+	if s0.Kind != BlockPlain || len(s0.Preds) != 1 || len(s0.Values) != 0 {
+		s0, ss0 = b, s0
+	} else {
+		ss0 = s0.Succs[0]
+	}
+	s1 := b.Succs[1]
+	if s1.Kind != BlockPlain || len(s1.Preds) != 1 || len(s1.Values) != 0 {
+		s1, ss1 = b, s1
+	} else {
+		ss1 = s1.Succs[0]
+	}
+
+	if ss0 != ss1 {
+		return false
+	}
+	ss := ss0
+
+	// s0 and s1 are equal with b if the corresponding block is missing
+	// (2nd, 3rd and 4th case in the figure).
+	i0, i1 := -1, -1
+	for i, p := range ss.Preds {
+		if p == s0 {
+			i0 = i
+		}
+		if p == s1 {
+			i1 = i
+		}
+	}
+	if i0 == -1 || i1 == -1 {
+		b.Fatalf("invalid predecessors")
+	}
+	for _, v := range ss.Values {
+		if v.Op == OpPhi && v.Args[i0] != v.Args[i1] {
+			return false
+		}
+	}
+
+	// Now we have two of following b->ss, b->s0->ss and b->s1->ss,
+	// with s0 and s1 empty if exist.
+	// We can replace it with b->ss without if all OpPhis in ss
+	// have identical predecessors (verified above).
+	// No critical edge is introduced because b will have one successor.
+	if s0 != b && s1 != b {
+		ss.removePred(s0)
+
+		// Replace edge b->s1->ss with b->ss.
+		// We need to keep a slot for Phis corresponding to b.
+		for i := range b.Succs {
+			if b.Succs[i] == s1 {
+				b.Succs[i] = ss
+			}
+		}
+		for i := range ss.Preds {
+			if ss.Preds[i] == s1 {
+				ss.Preds[i] = b
+			}
+		}
+	} else if s0 != b {
+		ss.removePred(s0)
+	} else if s1 != b {
+		ss.removePred(s1)
+	}
+	b.Kind = BlockPlain
+	b.Control = nil
+	b.Succs = append(b.Succs[:0], ss)
+
+	// Trash the empty blocks s0 & s1.
+	if s0 != b {
+		s0.Kind = BlockInvalid
+		s0.Values = nil
+		s0.Succs = nil
+		s0.Preds = nil
+	}
+	if s1 != b {
+		s1.Kind = BlockInvalid
+		s1.Values = nil
+		s1.Succs = nil
+		s1.Preds = nil
+	}
+	return true
+}
+
+func fuseBlockPlain(b *Block) bool {
+	if b.Kind != BlockPlain {
+		return false
+	}
+
+	c := b.Succs[0]
+	if len(c.Preds) != 1 {
+		return false
+	}
+
+	// move all of b'c values to c.
+	for _, v := range b.Values {
+		v.Block = c
+		c.Values = append(c.Values, v)
+	}
+
+	// replace b->c edge with preds(b) -> c
+	c.predstorage[0] = nil
+	if len(b.Preds) > len(b.predstorage) {
+		c.Preds = b.Preds
+	} else {
+		c.Preds = append(c.predstorage[:0], b.Preds...)
+	}
+	for _, p := range c.Preds {
+		for i, q := range p.Succs {
+			if q == b {
+				p.Succs[i] = c
+			}
+		}
+	}
+	if f := b.Func; f.Entry == b {
+		f.Entry = c
+	}
+
+	// trash b, just in case
+	b.Kind = BlockInvalid
+	b.Values = nil
+	b.Preds = nil
+	b.Succs = nil
+	return true
+}
--- a/src/cmd/compile/internal/ssa/fuse_test.go
+++ b/src/cmd/compile/internal/ssa/fuse_test.go
@ -0,0 +1,129 @@
+package ssa
+
+import (
+	"testing"
+)
+
+func TestFuseEliminatesOneBranch(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("nilptr", OpConstNil, ptrType, 0, nil),
+			Valu("bool1", OpNeqPtr, TypeBool, 0, nil, "ptr1", "nilptr"),
+			If("bool1", "then", "exit")),
+		Bloc("then",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	fuse(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["then"] && b.Kind != BlockInvalid {
+			t.Errorf("then was not eliminated, but should have")
+		}
+	}
+}
+
+func TestFuseEliminatesBothBranches(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("nilptr", OpConstNil, ptrType, 0, nil),
+			Valu("bool1", OpNeqPtr, TypeBool, 0, nil, "ptr1", "nilptr"),
+			If("bool1", "then", "else")),
+		Bloc("then",
+			Goto("exit")),
+		Bloc("else",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	fuse(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["then"] && b.Kind != BlockInvalid {
+			t.Errorf("then was not eliminated, but should have")
+		}
+		if b == fun.blocks["else"] && b.Kind != BlockInvalid {
+			t.Errorf("then was not eliminated, but should have")
+		}
+	}
+}
+
+func TestFuseHandlesPhis(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("nilptr", OpConstNil, ptrType, 0, nil),
+			Valu("bool1", OpNeqPtr, TypeBool, 0, nil, "ptr1", "nilptr"),
+			If("bool1", "then", "else")),
+		Bloc("then",
+			Goto("exit")),
+		Bloc("else",
+			Goto("exit")),
+		Bloc("exit",
+			Valu("phi", OpPhi, ptrType, 0, nil, "ptr1", "ptr1"),
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	fuse(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["then"] && b.Kind != BlockInvalid {
+			t.Errorf("then was not eliminated, but should have")
+		}
+		if b == fun.blocks["else"] && b.Kind != BlockInvalid {
+			t.Errorf("then was not eliminated, but should have")
+		}
+	}
+}
+
+func TestFuseEliminatesEmptyBlocks(t *testing.T) {
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("z0")),
+		Bloc("z1",
+			Goto("z2")),
+		Bloc("z3",
+			Goto("exit")),
+		Bloc("z2",
+			Goto("z3")),
+		Bloc("z0",
+			Goto("z1")),
+		Bloc("exit",
+			Exit("mem"),
+		))
+
+	CheckFunc(fun.f)
+	fuse(fun.f)
+
+	for k, b := range fun.blocks {
+		if k[:1] == "z" && b.Kind != BlockInvalid {
+			t.Errorf("%s was not eliminated, but should have", k)
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@ -0,0 +1,535 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "strings"
+
+// copied from ../../amd64/reg.go
+var regNamesAMD64 = []string{
+	".AX",
+	".CX",
+	".DX",
+	".BX",
+	".SP",
+	".BP",
+	".SI",
+	".DI",
+	".R8",
+	".R9",
+	".R10",
+	".R11",
+	".R12",
+	".R13",
+	".R14",
+	".R15",
+	".X0",
+	".X1",
+	".X2",
+	".X3",
+	".X4",
+	".X5",
+	".X6",
+	".X7",
+	".X8",
+	".X9",
+	".X10",
+	".X11",
+	".X12",
+	".X13",
+	".X14",
+	".X15",
+
+	// pseudo-registers
+	".SB",
+	".FLAGS",
+}
+
+func init() {
+	// Make map from reg names to reg integers.
+	if len(regNamesAMD64) > 64 {
+		panic("too many registers")
+	}
+	num := map[string]int{}
+	for i, name := range regNamesAMD64 {
+		if name[0] != '.' {
+			panic("register name " + name + " does not start with '.'")
+		}
+		num[name[1:]] = i
+	}
+	buildReg := func(s string) regMask {
+		m := regMask(0)
+		for _, r := range strings.Split(s, " ") {
+			if n, ok := num[r]; ok {
+				m |= regMask(1) << uint(n)
+				continue
+			}
+			panic("register " + r + " not found")
+		}
+		return m
+	}
+
+	// Common individual register masks
+	var (
+		ax         = buildReg("AX")
+		cx         = buildReg("CX")
+		dx         = buildReg("DX")
+		x15        = buildReg("X15")
+		gp         = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15")
+		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15")
+		gpsp       = gp | buildReg("SP")
+		gpspsb     = gpsp | buildReg("SB")
+		flags      = buildReg("FLAGS")
+		callerSave = gp | fp | flags
+	)
+	// Common slices of register masks
+	var (
+		gponly    = []regMask{gp}
+		fponly    = []regMask{fp}
+		flagsonly = []regMask{flags}
+	)
+
+	// Common regInfo
+	var (
+		gp01      = regInfo{inputs: []regMask{}, outputs: gponly}
+		gp11      = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags}
+		gp11nf    = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered
+		gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
+		gp21      = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: gponly, clobbers: flags}
+		gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
+		gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags}
+		gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
+			clobbers: dx | flags}
+		gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
+			clobbers: ax | flags}
+		gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
+			clobbers: ax | flags}
+
+		gp2flags  = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
+		gp1flags  = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
+		flagsgp   = regInfo{inputs: flagsonly, outputs: gponly}
+		readflags = regInfo{inputs: flagsonly, outputs: gponly}
+		flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
+
+		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
+		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
+
+		gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+		gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
+		gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
+		gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+
+		fp01    = regInfo{inputs: []regMask{}, outputs: fponly}
+		fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
+		fp21x15 = regInfo{inputs: []regMask{fp &^ x15, fp &^ x15},
+			clobbers: x15, outputs: []regMask{fp &^ x15}}
+		fpgp     = regInfo{inputs: fponly, outputs: gponly}
+		gpfp     = regInfo{inputs: gponly, outputs: fponly}
+		fp11     = regInfo{inputs: fponly, outputs: fponly}
+		fp2flags = regInfo{inputs: []regMask{fp, fp}, outputs: flagsonly}
+		// fp1flags = regInfo{inputs: fponly, outputs: flagsonly}
+
+		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
+		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
+
+		fpstore    = regInfo{inputs: []regMask{gpspsb, fp, 0}}
+		fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}}
+	)
+	// TODO: most ops clobber flags
+
+	// Suffixes encode the bit width of various instructions.
+	// Q = 64 bit, L = 32 bit, W = 16 bit, B = 8 bit
+
+	// TODO: 2-address instructions.  Mark ops as needing matching input/output regs.
+	var AMD64ops = []opData{
+		// fp ops
+		{name: "ADDSS", argLength: 2, reg: fp21, asm: "ADDSS"},    // fp32 add
+		{name: "ADDSD", argLength: 2, reg: fp21, asm: "ADDSD"},    // fp64 add
+		{name: "SUBSS", argLength: 2, reg: fp21x15, asm: "SUBSS"}, // fp32 sub
+		{name: "SUBSD", argLength: 2, reg: fp21x15, asm: "SUBSD"}, // fp64 sub
+		{name: "MULSS", argLength: 2, reg: fp21, asm: "MULSS"},    // fp32 mul
+		{name: "MULSD", argLength: 2, reg: fp21, asm: "MULSD"},    // fp64 mul
+		{name: "DIVSS", argLength: 2, reg: fp21x15, asm: "DIVSS"}, // fp32 div
+		{name: "DIVSD", argLength: 2, reg: fp21x15, asm: "DIVSD"}, // fp64 div
+
+		{name: "MOVSSload", argLength: 2, reg: fpload, asm: "MOVSS", aux: "SymOff"},          // fp32 load
+		{name: "MOVSDload", argLength: 2, reg: fpload, asm: "MOVSD", aux: "SymOff"},          // fp64 load
+		{name: "MOVSSconst", reg: fp01, asm: "MOVSS", aux: "Float", rematerializeable: true}, // fp32 constant
+		{name: "MOVSDconst", reg: fp01, asm: "MOVSD", aux: "Float", rematerializeable: true}, // fp64 constant
+		{name: "MOVSSloadidx4", argLength: 3, reg: fploadidx, asm: "MOVSS", aux: "SymOff"},   // fp32 load
+		{name: "MOVSDloadidx8", argLength: 3, reg: fploadidx, asm: "MOVSD", aux: "SymOff"},   // fp64 load
+
+		{name: "MOVSSstore", argLength: 3, reg: fpstore, asm: "MOVSS", aux: "SymOff"},        // fp32 store
+		{name: "MOVSDstore", argLength: 3, reg: fpstore, asm: "MOVSD", aux: "SymOff"},        // fp64 store
+		{name: "MOVSSstoreidx4", argLength: 4, reg: fpstoreidx, asm: "MOVSS", aux: "SymOff"}, // fp32 indexed by 4i store
+		{name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store
+
+		// binary ops
+		{name: "ADDQ", argLength: 2, reg: gp21, asm: "ADDQ"},                                   // arg0 + arg1
+		{name: "ADDL", argLength: 2, reg: gp21, asm: "ADDL"},                                   // arg0 + arg1
+		{name: "ADDW", argLength: 2, reg: gp21, asm: "ADDL"},                                   // arg0 + arg1
+		{name: "ADDB", argLength: 2, reg: gp21, asm: "ADDL"},                                   // arg0 + arg1
+		{name: "ADDQconst", argLength: 1, reg: gp11, asm: "ADDQ", aux: "Int64", typ: "UInt64"}, // arg0 + auxint
+		{name: "ADDLconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int32"},                // arg0 + auxint
+		{name: "ADDWconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int16"},                // arg0 + auxint
+		{name: "ADDBconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int8"},                 // arg0 + auxint
+
+		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ"},                    // arg0 - arg1
+		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL"},                    // arg0 - arg1
+		{name: "SUBW", argLength: 2, reg: gp21, asm: "SUBL"},                    // arg0 - arg1
+		{name: "SUBB", argLength: 2, reg: gp21, asm: "SUBL"},                    // arg0 - arg1
+		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64"}, // arg0 - auxint
+		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32"}, // arg0 - auxint
+		{name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int16"}, // arg0 - auxint
+		{name: "SUBBconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int8"},  // arg0 - auxint
+
+		{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ"},                    // arg0 * arg1
+		{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL"},                    // arg0 * arg1
+		{name: "MULW", argLength: 2, reg: gp21, asm: "IMULW"},                    // arg0 * arg1
+		{name: "MULB", argLength: 2, reg: gp21, asm: "IMULW"},                    // arg0 * arg1
+		{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64"}, // arg0 * auxint
+		{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32"}, // arg0 * auxint
+		{name: "MULWconst", argLength: 1, reg: gp11, asm: "IMULW", aux: "Int16"}, // arg0 * auxint
+		{name: "MULBconst", argLength: 1, reg: gp11, asm: "IMULW", aux: "Int8"},  // arg0 * auxint
+
+		{name: "HMULQ", argLength: 2, reg: gp11hmul, asm: "IMULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULL", argLength: 2, reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
+		{name: "HMULW", argLength: 2, reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
+		{name: "HMULB", argLength: 2, reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
+		{name: "HMULQU", argLength: 2, reg: gp11hmul, asm: "MULQ"}, // (arg0 * arg1) >> width
+		{name: "HMULLU", argLength: 2, reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
+		{name: "HMULWU", argLength: 2, reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
+		{name: "HMULBU", argLength: 2, reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width
+
+		{name: "AVGQU", argLength: 2, reg: gp21}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
+
+		{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
+		{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
+		{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
+		{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
+		{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // arg0 / arg1
+		{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // arg0 / arg1
+
+		{name: "MODQ", argLength: 2, reg: gp11mod, asm: "IDIVQ"}, // arg0 % arg1
+		{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL"}, // arg0 % arg1
+		{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW"}, // arg0 % arg1
+		{name: "MODQU", argLength: 2, reg: gp11mod, asm: "DIVQ"}, // arg0 % arg1
+		{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL"}, // arg0 % arg1
+		{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW"}, // arg0 % arg1
+
+		{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ"},                    // arg0 & arg1
+		{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL"},                    // arg0 & arg1
+		{name: "ANDW", argLength: 2, reg: gp21, asm: "ANDL"},                    // arg0 & arg1
+		{name: "ANDB", argLength: 2, reg: gp21, asm: "ANDL"},                    // arg0 & arg1
+		{name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64"}, // arg0 & auxint
+		{name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32"}, // arg0 & auxint
+		{name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int16"}, // arg0 & auxint
+		{name: "ANDBconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int8"},  // arg0 & auxint
+
+		{name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ"},                    // arg0 | arg1
+		{name: "ORL", argLength: 2, reg: gp21, asm: "ORL"},                    // arg0 | arg1
+		{name: "ORW", argLength: 2, reg: gp21, asm: "ORL"},                    // arg0 | arg1
+		{name: "ORB", argLength: 2, reg: gp21, asm: "ORL"},                    // arg0 | arg1
+		{name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64"}, // arg0 | auxint
+		{name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32"}, // arg0 | auxint
+		{name: "ORWconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int16"}, // arg0 | auxint
+		{name: "ORBconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int8"},  // arg0 | auxint
+
+		{name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ"},                    // arg0 ^ arg1
+		{name: "XORL", argLength: 2, reg: gp21, asm: "XORL"},                    // arg0 ^ arg1
+		{name: "XORW", argLength: 2, reg: gp21, asm: "XORL"},                    // arg0 ^ arg1
+		{name: "XORB", argLength: 2, reg: gp21, asm: "XORL"},                    // arg0 ^ arg1
+		{name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64"}, // arg0 ^ auxint
+		{name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32"}, // arg0 ^ auxint
+		{name: "XORWconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int16"}, // arg0 ^ auxint
+		{name: "XORBconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int8"},  // arg0 ^ auxint
+
+		{name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"},                    // arg0 compare to arg1
+		{name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
+		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"},                    // arg0 compare to arg1
+		{name: "CMPB", argLength: 2, reg: gp2flags, asm: "CMPB", typ: "Flags"},                    // arg0 compare to arg1
+		{name: "CMPQconst", argLength: 1, reg: gp1flags, asm: "CMPQ", typ: "Flags", aux: "Int64"}, // arg0 compare to auxint
+		{name: "CMPLconst", argLength: 1, reg: gp1flags, asm: "CMPL", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
+		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint
+		{name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"},  // arg0 compare to auxint
+
+		{name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32
+		{name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64
+
+		{name: "TESTQ", argLength: 2, reg: gp2flags, asm: "TESTQ", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+		{name: "TESTL", argLength: 2, reg: gp2flags, asm: "TESTL", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+		{name: "TESTW", argLength: 2, reg: gp2flags, asm: "TESTW", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+		{name: "TESTB", argLength: 2, reg: gp2flags, asm: "TESTB", typ: "Flags"},                    // (arg0 & arg1) compare to 0
+		{name: "TESTQconst", argLength: 1, reg: gp1flags, asm: "TESTQ", typ: "Flags", aux: "Int64"}, // (arg0 & auxint) compare to 0
+		{name: "TESTLconst", argLength: 1, reg: gp1flags, asm: "TESTL", typ: "Flags", aux: "Int32"}, // (arg0 & auxint) compare to 0
+		{name: "TESTWconst", argLength: 1, reg: gp1flags, asm: "TESTW", typ: "Flags", aux: "Int16"}, // (arg0 & auxint) compare to 0
+		{name: "TESTBconst", argLength: 1, reg: gp1flags, asm: "TESTB", typ: "Flags", aux: "Int8"},  // (arg0 & auxint) compare to 0
+
+		{name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ"},               // arg0 << arg1, shift amount is mod 64
+		{name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL"},               // arg0 << arg1, shift amount is mod 32
+		{name: "SHLW", argLength: 2, reg: gp21shift, asm: "SHLL"},               // arg0 << arg1, shift amount is mod 32
+		{name: "SHLB", argLength: 2, reg: gp21shift, asm: "SHLL"},               // arg0 << arg1, shift amount is mod 32
+		{name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64"}, // arg0 << auxint, shift amount 0-63
+		{name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32"}, // arg0 << auxint, shift amount 0-31
+		{name: "SHLWconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int16"}, // arg0 << auxint, shift amount 0-31
+		{name: "SHLBconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int8"},  // arg0 << auxint, shift amount 0-31
+		// Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount!
+
+		{name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ"},               // unsigned arg0 >> arg1, shift amount is mod 64
+		{name: "SHRL", argLength: 2, reg: gp21shift, asm: "SHRL"},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRW", argLength: 2, reg: gp21shift, asm: "SHRW"},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRB", argLength: 2, reg: gp21shift, asm: "SHRB"},               // unsigned arg0 >> arg1, shift amount is mod 32
+		{name: "SHRQconst", argLength: 1, reg: gp11, asm: "SHRQ", aux: "Int64"}, // unsigned arg0 >> auxint, shift amount 0-63
+		{name: "SHRLconst", argLength: 1, reg: gp11, asm: "SHRL", aux: "Int32"}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRWconst", argLength: 1, reg: gp11, asm: "SHRW", aux: "Int16"}, // unsigned arg0 >> auxint, shift amount 0-31
+		{name: "SHRBconst", argLength: 1, reg: gp11, asm: "SHRB", aux: "Int8"},  // unsigned arg0 >> auxint, shift amount 0-31
+
+		{name: "SARQ", argLength: 2, reg: gp21shift, asm: "SARQ"},               // signed arg0 >> arg1, shift amount is mod 64
+		{name: "SARL", argLength: 2, reg: gp21shift, asm: "SARL"},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARW", argLength: 2, reg: gp21shift, asm: "SARW"},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARB", argLength: 2, reg: gp21shift, asm: "SARB"},               // signed arg0 >> arg1, shift amount is mod 32
+		{name: "SARQconst", argLength: 1, reg: gp11, asm: "SARQ", aux: "Int64"}, // signed arg0 >> auxint, shift amount 0-63
+		{name: "SARLconst", argLength: 1, reg: gp11, asm: "SARL", aux: "Int32"}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int16"}, // signed arg0 >> auxint, shift amount 0-31
+		{name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8"},  // signed arg0 >> auxint, shift amount 0-31
+
+		{name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int64"}, // arg0 rotate left auxint, rotate amount 0-63
+		{name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int32"}, // arg0 rotate left auxint, rotate amount 0-31
+		{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16"}, // arg0 rotate left auxint, rotate amount 0-15
+		{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8"},  // arg0 rotate left auxint, rotate amount 0-7
+
+		// unary ops
+		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ"}, // -arg0
+		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL"}, // -arg0
+		{name: "NEGW", argLength: 1, reg: gp11, asm: "NEGL"}, // -arg0
+		{name: "NEGB", argLength: 1, reg: gp11, asm: "NEGL"}, // -arg0
+
+		{name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ"}, // ^arg0
+		{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL"}, // ^arg0
+		{name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL"}, // ^arg0
+		{name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL"}, // ^arg0
+
+		{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
+
+		{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
+		{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
+		// Note: SBBW and SBBB are subsumed by SBBL
+
+		{name: "SETEQ", argLength: 1, reg: readflags, asm: "SETEQ"}, // extract == condition from arg0
+		{name: "SETNE", argLength: 1, reg: readflags, asm: "SETNE"}, // extract != condition from arg0
+		{name: "SETL", argLength: 1, reg: readflags, asm: "SETLT"},  // extract signed < condition from arg0
+		{name: "SETLE", argLength: 1, reg: readflags, asm: "SETLE"}, // extract signed <= condition from arg0
+		{name: "SETG", argLength: 1, reg: readflags, asm: "SETGT"},  // extract signed > condition from arg0
+		{name: "SETGE", argLength: 1, reg: readflags, asm: "SETGE"}, // extract signed >= condition from arg0
+		{name: "SETB", argLength: 1, reg: readflags, asm: "SETCS"},  // extract unsigned < condition from arg0
+		{name: "SETBE", argLength: 1, reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0
+		{name: "SETA", argLength: 1, reg: readflags, asm: "SETHI"},  // extract unsigned > condition from arg0
+		{name: "SETAE", argLength: 1, reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0
+		// Need different opcodes for floating point conditions because
+		// any comparison involving a NaN is always FALSE and thus
+		// the patterns for inverting conditions cannot be used.
+		{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ"}, // extract == condition from arg0
+		{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE"}, // extract != condition from arg0
+		{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"},   // extract "ordered" (No Nan present) condition from arg0
+		{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"},   // extract "unordered" (Nan present) condition from arg0
+
+		{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
+		{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
+
+		{name: "MOVBQSX", argLength: 1, reg: gp11nf, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
+		{name: "MOVBQZX", argLength: 1, reg: gp11nf, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
+		{name: "MOVWQSX", argLength: 1, reg: gp11nf, asm: "MOVWQSX"}, // sign extend arg0 from int16 to int64
+		{name: "MOVWQZX", argLength: 1, reg: gp11nf, asm: "MOVWQZX"}, // zero extend arg0 from int16 to int64
+		{name: "MOVLQSX", argLength: 1, reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64
+		{name: "MOVLQZX", argLength: 1, reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64
+
+		{name: "MOVBconst", reg: gp01, asm: "MOVB", typ: "UInt8", aux: "Int8", rematerializeable: true},   // 8 low bits of auxint
+		{name: "MOVWconst", reg: gp01, asm: "MOVW", typ: "UInt16", aux: "Int16", rematerializeable: true}, // 16 low bits of auxint
+		{name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint
+		{name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
+
+		{name: "CVTTSD2SL", argLength: 1, reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
+		{name: "CVTTSD2SQ", argLength: 1, reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64
+		{name: "CVTTSS2SL", argLength: 1, reg: fpgp, asm: "CVTTSS2SL"}, // convert float32 to int32
+		{name: "CVTTSS2SQ", argLength: 1, reg: fpgp, asm: "CVTTSS2SQ"}, // convert float32 to int64
+		{name: "CVTSL2SS", argLength: 1, reg: gpfp, asm: "CVTSL2SS"},   // convert int32 to float32
+		{name: "CVTSL2SD", argLength: 1, reg: gpfp, asm: "CVTSL2SD"},   // convert int32 to float64
+		{name: "CVTSQ2SS", argLength: 1, reg: gpfp, asm: "CVTSQ2SS"},   // convert int64 to float32
+		{name: "CVTSQ2SD", argLength: 1, reg: gpfp, asm: "CVTSQ2SD"},   // convert int64 to float64
+		{name: "CVTSD2SS", argLength: 1, reg: fp11, asm: "CVTSD2SS"},   // convert float64 to float32
+		{name: "CVTSS2SD", argLength: 1, reg: fp11, asm: "CVTSS2SD"},   // convert float32 to float64
+
+		{name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR"}, // exclusive or, applied to X regs for float negation.
+
+		{name: "LEAQ", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux
+		{name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + arg1 + auxint + aux
+		{name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 2*arg1 + auxint + aux
+		{name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 4*arg1 + auxint + aux
+		{name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"},                         // arg0 + 8*arg1 + auxint + aux
+		// Note: LEAQ{1,2,4,8} must not have OpSB as either argument.
+
+		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
+		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"},  // load byte from arg0+auxint+aux. arg1=mem
+		{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff"},             // ditto, extend to int64
+		{name: "MOVBQZXload", argLength: 2, reg: gpload, asm: "MOVBQZX", aux: "SymOff"},             // ditto, extend to uint64
+		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16"}, // load 2 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff"},             // ditto, extend to int64
+		{name: "MOVWQZXload", argLength: 2, reg: gpload, asm: "MOVWQZX", aux: "SymOff"},             // ditto, extend to uint64
+		{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32"},    // load 4 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff"},             // ditto, extend to int64
+		{name: "MOVLQZXload", argLength: 2, reg: gpload, asm: "MOVLQZX", aux: "SymOff"},             // ditto, extend to uint64
+		{name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64"},    // load 8 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"},     // store byte in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"},     // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"},     // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem"},     // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128"},  // load 16 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem"},   // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
+
+		// indexed loads/stores
+		{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
+		{name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
+		{name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff"},    // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
+		{name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff"},    // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+		// TODO: sign-extending indexed loads
+		{name: "MOVBstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVB", aux: "SymOff"}, // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
+		{name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+		{name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+		{name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+		// TODO: add size-mismatched indexed loads, like MOVBstoreidx4.
+
+		// For storeconst ops, the AuxInt field encodes both
+		// the value to store and an address offset of the store.
+		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
+		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
+		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ...
+		{name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ...
+		{name: "MOVQstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem"}, // store 8 bytes of ...
+
+		{name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVB", aux: "SymValAndOff", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux.  arg2=mem
+		{name: "MOVWstoreconstidx2", argLength: 3, reg: gpstoreconstidx, asm: "MOVW", aux: "SymValAndOff", typ: "Mem"}, // store low 2 bytes of ... 2*arg1 ...
+		{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
+		{name: "MOVQstoreconstidx8", argLength: 3, reg: gpstoreconstidx, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem"}, // store 8 bytes of ... 8*arg1 ...
+
+		// arg0 = (duff-adjusted) pointer to start of memory to zero
+		// arg1 = value to store (will always be zero)
+		// arg2 = mem
+		// auxint = offset into duffzero code to start executing
+		// returns mem
+		{
+			name:      "DUFFZERO",
+			aux:       "Int64",
+			argLength: 3,
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("X0")},
+				clobbers: buildReg("DI FLAGS"),
+			},
+		},
+		{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", rematerializeable: true},
+
+		// arg0 = address of memory to zero
+		// arg1 = # of 8-byte words to zero
+		// arg2 = value to store (will always be zero)
+		// arg3 = mem
+		// returns mem
+		{
+			name:      "REPSTOSQ",
+			argLength: 4,
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")},
+				clobbers: buildReg("DI CX FLAGS"),
+			},
+		},
+
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"},                                // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLclosure", argLength: 3, reg: regInfo{[]regMask{gpsp, buildReg("DX"), 0}, callerSave, nil}, aux: "Int64"}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
+		{name: "CALLdefer", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                  // call deferproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLgo", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "Int64"},                                     // call newproc.  arg0=mem, auxint=argsize, returns mem
+		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "Int64"},           // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+
+		// arg0 = destination pointer
+		// arg1 = source pointer
+		// arg2 = mem
+		// auxint = offset from duffcopy symbol to call
+		// returns memory
+		{
+			name:      "DUFFCOPY",
+			aux:       "Int64",
+			argLength: 3,
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
+				clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary
+			},
+		},
+
+		// arg0 = destination pointer
+		// arg1 = source pointer
+		// arg2 = # of 8-byte words to copy
+		// arg3 = mem
+		// returns memory
+		{
+			name:      "REPMOVSQ",
+			argLength: 4,
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("SI"), buildReg("CX")},
+				clobbers: buildReg("DI SI CX"),
+			},
+		},
+
+		// (InvertFlags (CMPQ a b)) == (CMPQ b a)
+		// So if we want (SETL (CMPQ a b)) but we can't do that because a is a constant,
+		// then we do (SETL (InvertFlags (CMPQ b a))) instead.
+		// Rewrites will convert this to (SETG (CMPQ b a)).
+		// InvertFlags is a pseudo-op which can't appear in assembly output.
+		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
+
+		// Pseudo-ops
+		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
+		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
+		// and sorts it to the very beginning of the block to prevent other
+		// use of DX (the closure pointer)
+		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("DX")}}},
+		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
+		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}, clobbers: flags}},
+
+		// MOVQconvert converts between pointers and integers.
+		// We have a special op for this so as to not confuse GC
+		// (particularly stack maps).  It takes a memory arg so it
+		// gets correctly ordered with respect to GC safepoints.
+		// arg0=ptr/int arg1=mem, output=int/ptr
+		{name: "MOVQconvert", argLength: 2, reg: gp11nf, asm: "MOVQ"},
+
+		// Constant flag values.  For any comparison, there are 5 possible
+		// outcomes: the three from the signed total order (<,==,>) and the
+		// three from the unsigned total order.  The == cases overlap.
+		// Note: there's a sixth "unordered" outcome for floating-point
+		// comparisons, but we don't use such a beast yet.
+		// These ops are for temporary use by rewrite rules.  They
+		// cannot appear in the generated assembly.
+		{name: "FlagEQ"},     // equal
+		{name: "FlagLT_ULT"}, // signed < and unsigned <
+		{name: "FlagLT_UGT"}, // signed < and unsigned >
+		{name: "FlagGT_UGT"}, // signed > and unsigned <
+		{name: "FlagGT_ULT"}, // signed > and unsigned >
+	}
+
+	var AMD64blocks = []blockData{
+		{name: "EQ"},
+		{name: "NE"},
+		{name: "LT"},
+		{name: "LE"},
+		{name: "GT"},
+		{name: "GE"},
+		{name: "ULT"},
+		{name: "ULE"},
+		{name: "UGT"},
+		{name: "UGE"},
+		{name: "EQF"},
+		{name: "NEF"},
+		{name: "ORD"}, // FP, ordered comparison (parity zero)
+		{name: "NAN"}, // FP, unordered comparison (parity one)
+	}
+
+	archs = append(archs, arch{"AMD64", AMD64ops, AMD64blocks, regNamesAMD64})
+}
--- a/src/cmd/compile/internal/ssa/gen/README
+++ b/src/cmd/compile/internal/ssa/gen/README
@ -0,0 +1,7 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+This package generates opcode tables, rewrite rules, etc. for the ssa compiler.
+Run it with:
+   go run *.go
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@ -0,0 +1,740 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// values are specified using the following format:
+// (op <type> [auxint] {aux} arg0 arg1 ...)
+// the type and aux fields are optional
+// on the matching side
+//  - the type, aux, and auxint fields must match if they are specified.
+// on the generated side
+//  - the type of the top-level expression is the same as the one on the left-hand side.
+//  - the type of any subexpressions must be specified explicitly.
+//  - auxint will be 0 if not specified.
+//  - aux will be nil if not specified.
+
+// blocks are specified using the following format:
+// (kind controlvalue succ0 succ1 ...)
+// controlvalue must be "nil" or a value expression
+// succ* fields must be variables
+// For now, the generated successors must be a permutation of the matched successors.
+
+// constant folding
+(Trunc16to8 (Const16 [c])) -> (Const8 [int64(int8(c))])
+(Trunc32to8 (Const32 [c])) -> (Const8 [int64(int8(c))])
+(Trunc32to16 (Const32 [c])) -> (Const16 [int64(int16(c))])
+(Trunc64to8 (Const64 [c])) -> (Const8 [int64(int8(c))])
+(Trunc64to16 (Const64 [c])) -> (Const16 [int64(int16(c))])
+(Trunc64to32 (Const64 [c])) -> (Const32 [int64(int32(c))])
+
+(Neg8 (Const8 [c])) -> (Const8 [-c])
+(Neg16 (Const16 [c])) -> (Const16 [-c])
+(Neg32 (Const32 [c])) -> (Const32 [-c])
+(Neg64 (Const64 [c])) -> (Const64 [-c])
+
+(Add8 (Const8 [c]) (Const8 [d])) -> (Const8 [c+d])
+(Add16 (Const16 [c]) (Const16 [d])) -> (Const16 [c+d])
+(Add32 (Const32 [c]) (Const32 [d])) -> (Const32 [c+d])
+(Add64 (Const64 [c]) (Const64 [d])) -> (Const64 [c+d])
+
+(Sub8 (Const8 [c]) (Const8 [d])) -> (Const8 [c-d])
+(Sub16 (Const16 [c]) (Const16 [d])) -> (Const16 [c-d])
+(Sub32 (Const32 [c]) (Const32 [d])) -> (Const32 [c-d])
+(Sub64 (Const64 [c]) (Const64 [d])) -> (Const64 [c-d])
+
+(Mul8 (Const8 [c]) (Const8 [d])) -> (Const8 [c*d])
+(Mul16 (Const16 [c]) (Const16 [d])) -> (Const16 [c*d])
+(Mul32 (Const32 [c]) (Const32 [d])) -> (Const32 [c*d])
+(Mul64 (Const64 [c]) (Const64 [d])) -> (Const64 [c*d])
+
+(Lsh64x64  (Const64 [c]) (Const64 [d])) -> (Const64 [c << uint64(d)])
+(Rsh64x64  (Const64 [c]) (Const64 [d])) -> (Const64 [c >> uint64(d)])
+(Rsh64Ux64 (Const64 [c]) (Const64 [d])) -> (Const64 [int64(uint64(c) >> uint64(d))])
+(Lsh32x64  (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) << uint64(d))])
+(Rsh32x64  (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) >> uint64(d))])
+(Rsh32Ux64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(uint32(c) >> uint64(d))])
+(Lsh16x64  (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) << uint64(d))])
+(Rsh16x64  (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) >> uint64(d))])
+(Rsh16Ux64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(uint16(c) >> uint64(d))])
+(Lsh8x64   (Const8  [c]) (Const64 [d])) -> (Const8  [int64(int8(c) << uint64(d))])
+(Rsh8x64   (Const8  [c]) (Const64 [d])) -> (Const8  [int64(int8(c) >> uint64(d))])
+(Rsh8Ux64  (Const8  [c]) (Const64 [d])) -> (Const8  [int64(uint8(c) >> uint64(d))])
+
+(Lsh64x64  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64x64  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64Ux64 (Const64 [0]) _) -> (Const64 [0])
+(Lsh32x64  (Const32 [0]) _) -> (Const32 [0])
+(Rsh32x64  (Const32 [0]) _) -> (Const32 [0])
+(Rsh32Ux64 (Const32 [0]) _) -> (Const32 [0])
+(Lsh16x64  (Const16 [0]) _) -> (Const16 [0])
+(Rsh16x64  (Const16 [0]) _) -> (Const16 [0])
+(Rsh16Ux64 (Const16 [0]) _) -> (Const16 [0])
+(Lsh8x64   (Const8  [0]) _) -> (Const8  [0])
+(Rsh8x64   (Const8  [0]) _) -> (Const8  [0])
+(Rsh8Ux64  (Const8  [0]) _) -> (Const8  [0])
+
+(IsInBounds (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(inBounds32(c,d))])
+(IsInBounds (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(inBounds64(c,d))])
+(IsSliceInBounds (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(sliceInBounds32(c,d))])
+(IsSliceInBounds (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(sliceInBounds64(c,d))])
+
+(Eq64 x x) -> (ConstBool [1])
+(Eq32 x x) -> (ConstBool [1])
+(Eq16 x x) -> (ConstBool [1])
+(Eq8 x x) -> (ConstBool [1])
+(Eq8 (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i((int8(c) != 0) == (int8(d) != 0))])
+(Eq8 (ConstBool [0]) x) -> (Not x)
+(Eq8 (ConstBool [1]) x) -> x
+
+(Neq64 x x) -> (ConstBool [0])
+(Neq32 x x) -> (ConstBool [0])
+(Neq16 x x) -> (ConstBool [0])
+(Neq8 x x) -> (ConstBool [0])
+(Neq8 (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i((int8(c) != 0) != (int8(d) != 0))])
+(Neq8 (ConstBool [0]) x) -> x
+(Neq8 (ConstBool [1]) x) -> (Not x)
+
+(Eq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Eq64 (Const64 <t> [c-d]) x)
+(Eq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Eq32 (Const32 <t> [c-d]) x)
+(Eq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Eq16 (Const16 <t> [c-d]) x)
+(Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Eq8 (Const8 <t> [c-d]) x)
+
+(Neq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Neq64 (Const64 <t> [c-d]) x)
+(Neq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Neq32 (Const32 <t> [c-d]) x)
+(Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) -> (Neq16 (Const16 <t> [c-d]) x)
+(Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) -> (Neq8 (Const8 <t> [c-d]) x)
+
+// canonicalize: swap arguments for commutative operations when one argument is a constant.
+(Eq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Eq64 (Const64 <t> [c]) x)
+(Eq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Eq32 (Const32 <t> [c]) x)
+(Eq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Eq16 (Const16 <t> [c]) x)
+(Eq8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Eq8 (Const8 <t> [c]) x)
+(Eq8 x (ConstBool <t> [c])) && x.Op != OpConstBool -> (Eq8 (ConstBool <t> [c]) x)
+
+(Neq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Neq64 (Const64 <t> [c]) x)
+(Neq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Neq32 (Const32 <t> [c]) x)
+(Neq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Neq16 (Const16 <t> [c]) x)
+(Neq8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Neq8 (Const8 <t> [c]) x)
+(Neq8 x (ConstBool <t> [c])) && x.Op != OpConstBool -> (Neq8 (ConstBool <t> [c]) x)
+
+(Add64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [c]) x)
+(Add32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [c]) x)
+(Add16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [c]) x)
+(Add8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Add8 (Const8 <t> [c]) x)
+
+(Mul64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Mul64 (Const64 <t> [c]) x)
+(Mul32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Mul32 (Const32 <t> [c]) x)
+(Mul16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Mul16 (Const16 <t> [c]) x)
+(Mul8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Mul8 (Const8 <t> [c]) x)
+
+(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [-c]) x)
+(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Add32 (Const32 <t> [-c]) x)
+(Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Add16 (Const16 <t> [-c]) x)
+(Sub8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Add8 (Const8 <t> [-c]) x)
+
+(And64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (And64 (Const64 <t> [c]) x)
+(And32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (And32 (Const32 <t> [c]) x)
+(And16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (And16 (Const16 <t> [c]) x)
+(And8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (And8 (Const8 <t> [c]) x)
+
+(Or64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Or64 (Const64 <t> [c]) x)
+(Or32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Or32 (Const32 <t> [c]) x)
+(Or16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Or16 (Const16 <t> [c]) x)
+(Or8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Or8 (Const8 <t> [c]) x)
+
+(Xor64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Xor64 (Const64 <t> [c]) x)
+(Xor32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Xor32 (Const32 <t> [c]) x)
+(Xor16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Xor16 (Const16 <t> [c]) x)
+(Xor8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Xor8 (Const8 <t> [c]) x)
+
+// Distribute multiplication c * (d+x) -> c*d + c*x. Useful for:
+// a[i].b = ...; a[i+1].b = ...
+(Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x)) -> (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
+(Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x)) -> (Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x))
+
+// rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce
+// the number of the other rewrite rules for const shifts
+(Lsh64x32  <t> x (Const32 [c])) -> (Lsh64x64  x (Const64 <t> [int64(uint32(c))]))
+(Lsh64x16  <t> x (Const16 [c])) -> (Lsh64x64  x (Const64 <t> [int64(uint16(c))]))
+(Lsh64x8   <t> x (Const8 [c]))  -> (Lsh64x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh64x32  <t> x (Const32 [c])) -> (Rsh64x64  x (Const64 <t> [int64(uint32(c))]))
+(Rsh64x16  <t> x (Const16 [c])) -> (Rsh64x64  x (Const64 <t> [int64(uint16(c))]))
+(Rsh64x8   <t> x (Const8 [c]))  -> (Rsh64x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh64Ux32 <t> x (Const32 [c])) -> (Rsh64Ux64 x (Const64 <t> [int64(uint32(c))]))
+(Rsh64Ux16 <t> x (Const16 [c])) -> (Rsh64Ux64 x (Const64 <t> [int64(uint16(c))]))
+(Rsh64Ux8  <t> x (Const8 [c]))  -> (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))]))
+
+(Lsh32x32  <t> x (Const32 [c])) -> (Lsh32x64  x (Const64 <t> [int64(uint32(c))]))
+(Lsh32x16  <t> x (Const16 [c])) -> (Lsh32x64  x (Const64 <t> [int64(uint16(c))]))
+(Lsh32x8   <t> x (Const8 [c]))  -> (Lsh32x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh32x32  <t> x (Const32 [c])) -> (Rsh32x64  x (Const64 <t> [int64(uint32(c))]))
+(Rsh32x16  <t> x (Const16 [c])) -> (Rsh32x64  x (Const64 <t> [int64(uint16(c))]))
+(Rsh32x8   <t> x (Const8 [c]))  -> (Rsh32x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh32Ux32 <t> x (Const32 [c])) -> (Rsh32Ux64 x (Const64 <t> [int64(uint32(c))]))
+(Rsh32Ux16 <t> x (Const16 [c])) -> (Rsh32Ux64 x (Const64 <t> [int64(uint16(c))]))
+(Rsh32Ux8  <t> x (Const8 [c]))  -> (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))]))
+
+(Lsh16x32  <t> x (Const32 [c])) -> (Lsh16x64  x (Const64 <t> [int64(uint32(c))]))
+(Lsh16x16  <t> x (Const16 [c])) -> (Lsh16x64  x (Const64 <t> [int64(uint16(c))]))
+(Lsh16x8   <t> x (Const8 [c]))  -> (Lsh16x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh16x32  <t> x (Const32 [c])) -> (Rsh16x64  x (Const64 <t> [int64(uint32(c))]))
+(Rsh16x16  <t> x (Const16 [c])) -> (Rsh16x64  x (Const64 <t> [int64(uint16(c))]))
+(Rsh16x8   <t> x (Const8 [c]))  -> (Rsh16x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh16Ux32 <t> x (Const32 [c])) -> (Rsh16Ux64 x (Const64 <t> [int64(uint32(c))]))
+(Rsh16Ux16 <t> x (Const16 [c])) -> (Rsh16Ux64 x (Const64 <t> [int64(uint16(c))]))
+(Rsh16Ux8  <t> x (Const8 [c]))  -> (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))]))
+
+(Lsh8x32  <t> x (Const32 [c])) -> (Lsh8x64  x (Const64 <t> [int64(uint32(c))]))
+(Lsh8x16  <t> x (Const16 [c])) -> (Lsh8x64  x (Const64 <t> [int64(uint16(c))]))
+(Lsh8x8   <t> x (Const8 [c]))  -> (Lsh8x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh8x32  <t> x (Const32 [c])) -> (Rsh8x64  x (Const64 <t> [int64(uint32(c))]))
+(Rsh8x16  <t> x (Const16 [c])) -> (Rsh8x64  x (Const64 <t> [int64(uint16(c))]))
+(Rsh8x8   <t> x (Const8 [c]))  -> (Rsh8x64  x (Const64 <t> [int64(uint8(c))]))
+(Rsh8Ux32 <t> x (Const32 [c])) -> (Rsh8Ux64 x (Const64 <t> [int64(uint32(c))]))
+(Rsh8Ux16 <t> x (Const16 [c])) -> (Rsh8Ux64 x (Const64 <t> [int64(uint16(c))]))
+(Rsh8Ux8  <t> x (Const8 [c]))  -> (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))]))
+
+// shifts by zero
+(Lsh64x64  x (Const64 [0])) -> x
+(Rsh64x64  x (Const64 [0])) -> x
+(Rsh64Ux64 x (Const64 [0])) -> x
+(Lsh32x64  x (Const64 [0])) -> x
+(Rsh32x64  x (Const64 [0])) -> x
+(Rsh32Ux64 x (Const64 [0])) -> x
+(Lsh16x64  x (Const64 [0])) -> x
+(Rsh16x64  x (Const64 [0])) -> x
+(Rsh16Ux64 x (Const64 [0])) -> x
+(Lsh8x64   x (Const64 [0])) -> x
+(Rsh8x64   x (Const64 [0])) -> x
+(Rsh8Ux64  x (Const64 [0])) -> x
+
+// zero shifted.
+// TODO: other bit sizes.
+(Lsh64x64  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64x64  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64Ux64 (Const64 [0]) _) -> (Const64 [0])
+(Lsh64x32  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64x32  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64Ux32 (Const64 [0]) _) -> (Const64 [0])
+(Lsh64x16  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64x16  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64Ux16 (Const64 [0]) _) -> (Const64 [0])
+(Lsh64x8  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64x8  (Const64 [0]) _) -> (Const64 [0])
+(Rsh64Ux8 (Const64 [0]) _) -> (Const64 [0])
+
+// large left shifts of all values, and right shifts of unsigned values
+(Lsh64x64  _ (Const64 [c])) && uint64(c) >= 64 -> (Const64 [0])
+(Rsh64Ux64 _ (Const64 [c])) && uint64(c) >= 64 -> (Const64 [0])
+(Lsh32x64  _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
+(Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
+(Lsh16x64  _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
+(Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
+(Lsh8x64   _ (Const64 [c])) && uint64(c) >= 8  -> (Const8 [0])
+(Rsh8Ux64  _ (Const64 [c])) && uint64(c) >= 8  -> (Const8 [0])
+
+
+// combine const shifts
+(Lsh64x64 <t> (Lsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh64x64 x (Const64 <t> [c+d]))
+(Lsh32x64 <t> (Lsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh32x64 x (Const64 <t> [c+d]))
+(Lsh16x64 <t> (Lsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh16x64 x (Const64 <t> [c+d]))
+(Lsh8x64  <t> (Lsh8x64  x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Lsh8x64  x (Const64 <t> [c+d]))
+
+(Rsh64x64 <t> (Rsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh64x64 x (Const64 <t> [c+d]))
+(Rsh32x64 <t> (Rsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh32x64 x (Const64 <t> [c+d]))
+(Rsh16x64 <t> (Rsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh16x64 x (Const64 <t> [c+d]))
+(Rsh8x64  <t> (Rsh8x64  x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh8x64  x (Const64 <t> [c+d]))
+
+(Rsh64Ux64 <t> (Rsh64Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh64Ux64 x (Const64 <t> [c+d]))
+(Rsh32Ux64 <t> (Rsh32Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh32Ux64 x (Const64 <t> [c+d]))
+(Rsh16Ux64 <t> (Rsh16Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh16Ux64 x (Const64 <t> [c+d]))
+(Rsh8Ux64  <t> (Rsh8Ux64  x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) -> (Rsh8Ux64  x (Const64 <t> [c+d]))
+
+// constant comparisons
+(Eq64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) == int64(d))])
+(Eq32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) == int32(d))])
+(Eq16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) == int16(d))])
+(Eq8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  == int8(d))])
+
+(Neq64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) != int64(d))])
+(Neq32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) != int32(d))])
+(Neq16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) != int16(d))])
+(Neq8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  != int8(d))])
+
+(Greater64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) > int64(d))])
+(Greater32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) > int32(d))])
+(Greater16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) > int16(d))])
+(Greater8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  > int8(d))])
+
+(Greater64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) > uint64(d))])
+(Greater32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) > uint32(d))])
+(Greater16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) > uint16(d))])
+(Greater8U  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(uint8(c)  > uint8(d))])
+
+(Geq64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) >= int64(d))])
+(Geq32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) >= int32(d))])
+(Geq16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) >= int16(d))])
+(Geq8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  >= int8(d))])
+
+(Geq64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) >= uint64(d))])
+(Geq32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) >= uint32(d))])
+(Geq16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) >= uint16(d))])
+(Geq8U  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(uint8(c)  >= uint8(d))])
+
+(Less64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) < int64(d))])
+(Less32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) < int32(d))])
+(Less16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) < int16(d))])
+(Less8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  < int8(d))])
+
+(Less64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) < uint64(d))])
+(Less32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) < uint32(d))])
+(Less16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) < uint16(d))])
+(Less8U  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(uint8(c)  < uint8(d))])
+
+(Leq64 (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(int64(c) <= int64(d))])
+(Leq32 (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(int32(c) <= int32(d))])
+(Leq16 (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(int16(c) <= int16(d))])
+(Leq8  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(int8(c)  <= int8(d))])
+
+(Leq64U (Const64 [c]) (Const64 [d])) -> (ConstBool [b2i(uint64(c) <= uint64(d))])
+(Leq32U (Const32 [c]) (Const32 [d])) -> (ConstBool [b2i(uint32(c) <= uint32(d))])
+(Leq16U (Const16 [c]) (Const16 [d])) -> (ConstBool [b2i(uint16(c) <= uint16(d))])
+(Leq8U  (Const8  [c]) (Const8  [d])) -> (ConstBool [b2i(uint8(c)  <= uint8(d))])
+
+// simplifications
+(Or64 x x) -> x
+(Or32 x x) -> x
+(Or16 x x) -> x
+(Or8 x x) -> x
+(Or64 (Const64 [0]) x) -> x
+(Or32 (Const32 [0]) x) -> x
+(Or16 (Const16 [0]) x) -> x
+(Or8 (Const8 [0]) x) -> x
+(Or64 (Const64 [-1]) _) -> (Const64 [-1])
+(Or32 (Const32 [-1]) _) -> (Const32 [-1])
+(Or16 (Const16 [-1]) _) -> (Const16 [-1])
+(Or8 (Const8 [-1]) _) -> (Const8 [-1])
+(And64 x x) -> x
+(And32 x x) -> x
+(And16 x x) -> x
+(And8 x x) -> x
+(And64 (Const64 [-1]) x) -> x
+(And32 (Const32 [-1]) x) -> x
+(And16 (Const16 [-1]) x) -> x
+(And8 (Const8 [-1]) x) -> x
+(And64 (Const64 [0]) _) -> (Const64 [0])
+(And32 (Const32 [0]) _) -> (Const32 [0])
+(And16 (Const16 [0]) _) -> (Const16 [0])
+(And8 (Const8 [0]) _) -> (Const8 [0])
+(Xor64 x x) -> (Const64 [0])
+(Xor32 x x) -> (Const32 [0])
+(Xor16 x x) -> (Const16 [0])
+(Xor8 x x) -> (Const8 [0])
+(Xor64 (Const64 [0]) x) -> x
+(Xor32 (Const32 [0]) x) -> x
+(Xor16 (Const16 [0]) x) -> x
+(Xor8 (Const8 [0]) x) -> x
+(Add64 (Const64 [0]) x) -> x
+(Add32 (Const32 [0]) x) -> x
+(Add16 (Const16 [0]) x) -> x
+(Add8 (Const8 [0]) x) -> x
+(Sub64 x x) -> (Const64 [0])
+(Sub32 x x) -> (Const32 [0])
+(Sub16 x x) -> (Const16 [0])
+(Sub8 x x) -> (Const8 [0])
+(Mul64 (Const64 [0]) _) -> (Const64 [0])
+(Mul32 (Const32 [0]) _) -> (Const32 [0])
+(Mul16 (Const16 [0]) _) -> (Const16 [0])
+(Mul8 (Const8 [0]) _) -> (Const8 [0])
+(Com8 (Com8 x)) -> x
+(Com16 (Com16 x)) -> x
+(Com32 (Com32 x)) -> x
+(Com64 (Com64 x)) -> x
+(Neg8 (Sub8 x y)) -> (Sub8 y x)
+(Neg16 (Sub16 x y)) -> (Sub16 y x)
+(Neg32 (Sub32 x y)) -> (Sub32 y x)
+(Neg64 (Sub64 x y)) -> (Sub64 y x)
+
+// Rewrite AND of consts as shifts if possible, slightly faster for 32/64 bit operands
+// leading zeros can be shifted left, then right
+(And64 <t> (Const64 [y]) x) && nlz(y) + nto(y) == 64 -> (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)]))
+(And32 <t> (Const32 [y]) x) && nlz(int64(int32(y))) + nto(int64(int32(y))) == 64 -> (Rsh32Ux32 (Lsh32x32 <t> x (Const32 <t> [nlz(int64(int32(y)))-32])) (Const32 <t> [nlz(int64(int32(y)))-32]))
+// trailing zeros can be shifted right, then left
+(And64 <t> (Const64 [y]) x) && nlo(y) + ntz(y) == 64 -> (Lsh64x64 (Rsh64Ux64 <t> x (Const64 <t> [ntz(y)])) (Const64 <t> [ntz(y)]))
+(And32 <t> (Const32 [y]) x) && nlo(int64(int32(y))) + ntz(int64(int32(y))) == 64 -> (Lsh32x32 (Rsh32Ux32 <t> x (Const32 <t> [ntz(int64(int32(y)))])) (Const32 <t> [ntz(int64(int32(y)))]))
+
+// simplifications often used for lengths.  e.g. len(s[i:i+5])==5
+(Sub64 (Add64 x y) x) -> y
+(Sub64 (Add64 x y) y) -> x
+(Sub32 (Add32 x y) x) -> y
+(Sub32 (Add32 x y) y) -> x
+(Sub16 (Add16 x y) x) -> y
+(Sub16 (Add16 x y) y) -> x
+(Sub8 (Add8 x y) x) -> y
+(Sub8 (Add8 x y) y) -> x
+
+// basic phi simplifications
+(Phi (Const8 [c]) (Const8 [d])) && int8(c) == int8(d) -> (Const8 [c])
+(Phi (Const16 [c]) (Const16 [d])) && int16(c) == int16(d) -> (Const16 [c])
+(Phi (Const32 [c]) (Const32 [d])) && int32(c) == int32(d) -> (Const32 [c])
+(Phi (Const64 [c]) (Const64 [c])) -> (Const64 [c])
+
+// user nil checks
+(NeqPtr p (ConstNil)) -> (IsNonNil p)
+(NeqPtr (ConstNil) p) -> (IsNonNil p)
+(EqPtr p (ConstNil)) -> (Not (IsNonNil p))
+(EqPtr (ConstNil) p) -> (Not (IsNonNil p))
+
+// slice and interface comparisons
+// The frontend ensures that we can only compare against nil,
+// so we need only compare the first word (interface type or slice ptr).
+(EqInter x y)  -> (EqPtr  (ITab x) (ITab y))
+(NeqInter x y) -> (NeqPtr (ITab x) (ITab y))
+(EqSlice x y)  -> (EqPtr  (SlicePtr x) (SlicePtr y))
+(NeqSlice x y) -> (NeqPtr (SlicePtr x) (SlicePtr y))
+
+
+// Load of store of same address, with compatibly typed value and same size
+(Load <t1> p1 (Store [w] p2 x _)) && isSamePtr(p1,p2) && t1.Compare(x.Type)==CMPeq && w == t1.Size() -> x
+
+
+// indexing operations
+// Note: bounds check has already been done
+(ArrayIndex (Load ptr mem) idx) && b == v.Args[0].Block -> (Load (PtrIndex <v.Type.PtrTo()> ptr idx) mem)
+(PtrIndex <t> ptr idx) && config.PtrSize == 4 -> (AddPtr ptr (Mul32 <config.fe.TypeInt()> idx (Const32 <config.fe.TypeInt()> [t.Elem().Size()])))
+(PtrIndex <t> ptr idx) && config.PtrSize == 8 -> (AddPtr ptr (Mul64 <config.fe.TypeInt()> idx (Const64 <config.fe.TypeInt()> [t.Elem().Size()])))
+
+// struct operations
+(StructSelect (StructMake1 x)) -> x
+(StructSelect [0] (StructMake2 x _)) -> x
+(StructSelect [1] (StructMake2 _ x)) -> x
+(StructSelect [0] (StructMake3 x _ _)) -> x
+(StructSelect [1] (StructMake3 _ x _)) -> x
+(StructSelect [2] (StructMake3 _ _ x)) -> x
+(StructSelect [0] (StructMake4 x _ _ _)) -> x
+(StructSelect [1] (StructMake4 _ x _ _)) -> x
+(StructSelect [2] (StructMake4 _ _ x _)) -> x
+(StructSelect [3] (StructMake4 _ _ _ x)) -> x
+
+(Load <t> _ _) && t.IsStruct() && t.NumFields() == 0 && config.fe.CanSSA(t) ->
+  (StructMake0)
+(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 1 && config.fe.CanSSA(t) ->
+  (StructMake1
+    (Load <t.FieldType(0)> ptr mem))
+(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 2 && config.fe.CanSSA(t) ->
+  (StructMake2
+    (Load <t.FieldType(0)> ptr mem)
+    (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem))
+(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 3 && config.fe.CanSSA(t) ->
+  (StructMake3
+    (Load <t.FieldType(0)> ptr mem)
+    (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
+    (Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem))
+(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 4 && config.fe.CanSSA(t) ->
+  (StructMake4
+    (Load <t.FieldType(0)> ptr mem)
+    (Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
+    (Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem)
+    (Load <t.FieldType(3)> (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] ptr) mem))
+
+(StructSelect [i] (Load <t> ptr mem)) && !config.fe.CanSSA(t) ->
+  @v.Args[0].Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(i)] ptr) mem)
+
+(Store _ (StructMake0) mem) -> mem
+(Store dst (StructMake1 <t> f0) mem) ->
+  (Store [t.FieldType(0).Size()] dst f0 mem)
+(Store dst (StructMake2 <t> f0 f1) mem) ->
+  (Store [t.FieldType(1).Size()]
+    (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+    f1
+    (Store [t.FieldType(0).Size()] dst f0 mem))
+(Store dst (StructMake3 <t> f0 f1 f2) mem) ->
+  (Store [t.FieldType(2).Size()]
+    (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
+    f2
+    (Store [t.FieldType(1).Size()]
+      (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+      f1
+      (Store [t.FieldType(0).Size()] dst f0 mem)))
+(Store dst (StructMake4 <t> f0 f1 f2 f3) mem) ->
+  (Store [t.FieldType(3).Size()]
+    (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst)
+    f3
+    (Store [t.FieldType(2).Size()]
+      (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
+      f2
+      (Store [t.FieldType(1).Size()]
+        (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
+        f1
+        (Store [t.FieldType(0).Size()] dst f0 mem))))
+
+// complex ops
+(ComplexReal (ComplexMake real _  )) -> real
+(ComplexImag (ComplexMake _ imag )) -> imag
+
+(Load <t> ptr mem) && t.IsComplex() && t.Size() == 8 ->
+  (ComplexMake
+    (Load <config.fe.TypeFloat32()> ptr mem)
+    (Load <config.fe.TypeFloat32()>
+      (OffPtr <config.fe.TypeFloat32().PtrTo()> [4] ptr)
+      mem)
+    )
+(Store [8] dst (ComplexMake real imag) mem) ->
+  (Store [4]
+    (OffPtr <config.fe.TypeFloat32().PtrTo()> [4] dst)
+    imag
+    (Store [4] dst real mem))
+
+(Load <t> ptr mem) && t.IsComplex() && t.Size() == 16 ->
+  (ComplexMake
+    (Load <config.fe.TypeFloat64()> ptr mem)
+    (Load <config.fe.TypeFloat64()>
+      (OffPtr <config.fe.TypeFloat64().PtrTo()> [8] ptr)
+      mem)
+    )
+(Store [16] dst (ComplexMake real imag) mem) ->
+  (Store [8]
+    (OffPtr <config.fe.TypeFloat64().PtrTo()> [8] dst)
+    imag
+    (Store [8] dst real mem))
+
+// string ops
+(StringPtr (StringMake ptr _)) -> ptr
+(StringLen (StringMake _ len)) -> len
+(ConstString {s}) && config.PtrSize == 4 && s.(string) == "" ->
+  (StringMake (ConstNil) (Const32 <config.fe.TypeInt()> [0]))
+(ConstString {s}) && config.PtrSize == 8 && s.(string) == "" ->
+  (StringMake (ConstNil) (Const64 <config.fe.TypeInt()> [0]))
+(ConstString {s}) && config.PtrSize == 4 && s.(string) != "" ->
+  (StringMake
+    (Addr <config.fe.TypeBytePtr()> {config.fe.StringData(s.(string))}
+      (SB))
+    (Const32 <config.fe.TypeInt()> [int64(len(s.(string)))]))
+(ConstString {s}) && config.PtrSize == 8 && s.(string) != "" ->
+  (StringMake
+    (Addr <config.fe.TypeBytePtr()> {config.fe.StringData(s.(string))}
+      (SB))
+    (Const64 <config.fe.TypeInt()> [int64(len(s.(string)))]))
+(Load <t> ptr mem) && t.IsString() ->
+  (StringMake
+    (Load <config.fe.TypeBytePtr()> ptr mem)
+    (Load <config.fe.TypeInt()>
+      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
+      mem))
+(Store [2*config.PtrSize] dst (StringMake ptr len) mem) ->
+  (Store [config.PtrSize]
+    (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] dst)
+    len
+    (Store [config.PtrSize] dst ptr mem))
+
+// slice ops
+(SlicePtr (SliceMake ptr _ _ )) -> ptr
+(SliceLen (SliceMake _ len _)) -> len
+(SliceCap (SliceMake _ _ cap)) -> cap
+(ConstSlice) && config.PtrSize == 4 ->
+  (SliceMake
+    (ConstNil <config.fe.TypeBytePtr()>)
+    (Const32 <config.fe.TypeInt()> [0])
+    (Const32 <config.fe.TypeInt()> [0]))
+(ConstSlice) && config.PtrSize == 8 ->
+  (SliceMake
+    (ConstNil <config.fe.TypeBytePtr()>)
+    (Const64 <config.fe.TypeInt()> [0])
+    (Const64 <config.fe.TypeInt()> [0]))
+
+(Load <t> ptr mem) && t.IsSlice() ->
+  (SliceMake
+    (Load <config.fe.TypeBytePtr()> ptr mem)
+    (Load <config.fe.TypeInt()>
+      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
+      mem)
+    (Load <config.fe.TypeInt()>
+      (OffPtr <config.fe.TypeInt().PtrTo()> [2*config.PtrSize] ptr)
+      mem))
+(Store [3*config.PtrSize] dst (SliceMake ptr len cap) mem) ->
+  (Store [config.PtrSize]
+    (OffPtr <config.fe.TypeInt().PtrTo()> [2*config.PtrSize] dst)
+    cap
+    (Store [config.PtrSize]
+      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] dst)
+      len
+      (Store [config.PtrSize] dst ptr mem)))
+
+// interface ops
+(ITab (IMake itab _)) -> itab
+(IData (IMake _ data)) -> data
+(ConstInterface) ->
+  (IMake
+    (ConstNil <config.fe.TypeBytePtr()>)
+    (ConstNil <config.fe.TypeBytePtr()>))
+(Load <t> ptr mem) && t.IsInterface() ->
+  (IMake
+    (Load <config.fe.TypeBytePtr()> ptr mem)
+    (Load <config.fe.TypeBytePtr()>
+      (OffPtr <config.fe.TypeBytePtr().PtrTo()> [config.PtrSize] ptr)
+      mem))
+(Store [2*config.PtrSize] dst (IMake itab data) mem) ->
+  (Store [config.PtrSize]
+    (OffPtr <config.fe.TypeBytePtr().PtrTo()> [config.PtrSize] dst)
+    data
+    (Store [config.PtrSize] dst itab mem))
+
+// un-SSAable values use mem->mem copies
+(Store [size] dst (Load <t> src mem) mem) && !config.fe.CanSSA(t) -> (Move [size] dst src mem)
+(Store [size] dst (Load <t> src mem) (VarDef {x} mem)) && !config.fe.CanSSA(t) -> (Move [size] dst src (VarDef {x} mem))
+
+(Check (NilCheck (GetG _) _) next) -> (Plain nil next)
+
+(If (Not cond) yes no) -> (If cond no yes)
+(If (ConstBool [c]) yes no) && c == 1 -> (First nil yes no)
+(If (ConstBool [c]) yes no) && c == 0 -> (First nil no yes)
+
+// Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
+(Convert (Add64 (Convert ptr mem) off) mem) -> (Add64 ptr off)
+(Convert (Add64 off (Convert ptr mem)) mem) -> (Add64 ptr off)
+(Convert (Convert ptr mem) mem) -> ptr
+
+// Decompose compound argument values
+(Arg {n} [off]) && v.Type.IsString() ->
+  (StringMake
+    (Arg <config.fe.TypeBytePtr()> {n} [off])
+    (Arg <config.fe.TypeInt()> {n} [off+config.PtrSize]))
+
+(Arg {n} [off]) && v.Type.IsSlice() ->
+  (SliceMake
+    (Arg <config.fe.TypeBytePtr()> {n} [off])
+    (Arg <config.fe.TypeInt()> {n} [off+config.PtrSize])
+    (Arg <config.fe.TypeInt()> {n} [off+2*config.PtrSize]))
+
+(Arg {n} [off]) && v.Type.IsInterface() ->
+  (IMake
+    (Arg <config.fe.TypeBytePtr()> {n} [off])
+    (Arg <config.fe.TypeBytePtr()> {n} [off+config.PtrSize]))
+
+(Arg {n} [off]) && v.Type.IsComplex() && v.Type.Size() == 16 ->
+  (ComplexMake
+    (Arg <config.fe.TypeFloat64()> {n} [off])
+    (Arg <config.fe.TypeFloat64()> {n} [off+8]))
+
+(Arg {n} [off]) && v.Type.IsComplex() && v.Type.Size() == 8 ->
+  (ComplexMake
+    (Arg <config.fe.TypeFloat32()> {n} [off])
+    (Arg <config.fe.TypeFloat32()> {n} [off+4]))
+
+(Arg <t>) && t.IsStruct() && t.NumFields() == 0 && config.fe.CanSSA(t) ->
+  (StructMake0)
+(Arg <t> {n} [off]) && t.IsStruct() && t.NumFields() == 1 && config.fe.CanSSA(t) ->
+  (StructMake1
+    (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)]))
+(Arg <t> {n} [off]) && t.IsStruct() && t.NumFields() == 2 && config.fe.CanSSA(t) ->
+  (StructMake2
+    (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])
+    (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)]))
+(Arg <t> {n} [off]) && t.IsStruct() && t.NumFields() == 3 && config.fe.CanSSA(t) ->
+  (StructMake3
+    (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])
+    (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)])
+    (Arg <t.FieldType(2)> {n} [off+t.FieldOff(2)]))
+(Arg <t> {n} [off]) && t.IsStruct() && t.NumFields() == 4 && config.fe.CanSSA(t) ->
+  (StructMake4
+    (Arg <t.FieldType(0)> {n} [off+t.FieldOff(0)])
+    (Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)])
+    (Arg <t.FieldType(2)> {n} [off+t.FieldOff(2)])
+    (Arg <t.FieldType(3)> {n} [off+t.FieldOff(3)]))
+
+// strength reduction of divide by a constant.
+// Note: frontend does <=32 bits.  We only need to do 64 bits here.
+// TODO: Do them all here?
+
+// Div/mod by 1.  Currently handled by frontend.
+//(Div64 n (Const64 [1])) -> n
+//(Div64u n (Const64 [1])) -> n
+//(Mod64 n (Const64 [1])) -> (Const64 [0])
+//(Mod64u n (Const64 [1])) -> (Const64 [0])
+
+// Unsigned divide by power of 2.  Currently handled by frontend.
+//(Div64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (Rsh64Ux64 n (Const64 <t> [log2(c)]))
+//(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (And64 n (Const64 <t> [c-1]))
+
+// Signed divide by power of 2.  Currently handled by frontend.
+// n / c = n >> log(c)       if n >= 0
+//       = (n+c-1) >> log(c) if n < 0
+// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
+//(Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) ->
+//  (Rsh64x64
+//    (Add64 <t>
+//      n
+//      (Rsh64Ux64 <t>
+//        (Rsh64x64 <t> n (Const64 <t> [63]))
+//        (Const64 <t> [64-log2(c)])))
+//    (Const64 <t> [log2(c)]))
+
+// Unsigned divide, not a power of 2.  Strength reduce to a multiply.
+(Div64u <t> x (Const64 [c])) && umagic64ok(c) && !umagic64a(c) ->
+  (Rsh64Ux64
+    (Hmul64u <t>
+      (Const64 <t> [umagic64m(c)])
+      x)
+    (Const64 <t> [umagic64s(c)]))
+(Div64u <t> x (Const64 [c])) && umagic64ok(c) && umagic64a(c) ->
+  (Rsh64Ux64
+    (Avg64u <t>
+      (Hmul64u <t>
+        x
+        (Const64 <t> [umagic64m(c)]))
+      x)
+    (Const64 <t> [umagic64s(c)-1]))
+
+// Signed divide, not a power of 2.  Strength reduce to a multiply.
+(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) > 0 ->
+  (Sub64 <t>
+    (Rsh64x64 <t>
+      (Hmul64 <t>
+        (Const64 <t> [smagic64m(c)])
+        x)
+      (Const64 <t> [smagic64s(c)]))
+    (Rsh64x64 <t>
+      x
+      (Const64 <t> [63])))
+(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) < 0 ->
+  (Sub64 <t>
+    (Rsh64x64 <t>
+      (Add64 <t>
+        (Hmul64 <t>
+          (Const64 <t> [smagic64m(c)])
+          x)
+        x)
+      (Const64 <t> [smagic64s(c)]))
+    (Rsh64x64 <t>
+      x
+      (Const64 <t> [63])))
+(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) > 0 ->
+  (Neg64 <t>
+    (Sub64 <t>
+      (Rsh64x64 <t>
+        (Hmul64 <t>
+          (Const64 <t> [smagic64m(c)])
+          x)
+        (Const64 <t> [smagic64s(c)]))
+      (Rsh64x64 <t>
+        x
+        (Const64 <t> [63]))))
+(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) < 0 ->
+  (Neg64 <t>
+    (Sub64 <t>
+      (Rsh64x64 <t>
+        (Add64 <t>
+          (Hmul64 <t>
+            (Const64 <t> [smagic64m(c)])
+            x)
+          x)
+        (Const64 <t> [smagic64s(c)]))
+      (Rsh64x64 <t>
+        x
+        (Const64 <t> [63]))))
+
+// A%B = A-(A/B*B).
+// This implements % with two * and a bunch of ancillary ops.
+// One of the * is free if the user's code also computes A/B.
+(Mod64  <t> x (Const64 [c])) && smagic64ok(c) -> (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+(Mod64u <t> x (Const64 [c])) && umagic64ok(c) -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@ -0,0 +1,416 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+var genericOps = []opData{
+	// 2-input arithmetic
+	// Types must be consistent with Go typing.  Add, for example, must take two values
+	// of the same type and produces that same type.
+	{name: "Add8", argLength: 2, commutative: true}, // arg0 + arg1
+	{name: "Add16", argLength: 2, commutative: true},
+	{name: "Add32", argLength: 2, commutative: true},
+	{name: "Add64", argLength: 2, commutative: true},
+	{name: "AddPtr", argLength: 2}, // For address calculations.  arg0 is a pointer and arg1 is an int.
+	{name: "Add32F", argLength: 2},
+	{name: "Add64F", argLength: 2},
+	// TODO: Add64C, Add128C
+
+	{name: "Sub8", argLength: 2}, // arg0 - arg1
+	{name: "Sub16", argLength: 2},
+	{name: "Sub32", argLength: 2},
+	{name: "Sub64", argLength: 2},
+	{name: "SubPtr", argLength: 2},
+	{name: "Sub32F", argLength: 2},
+	{name: "Sub64F", argLength: 2},
+
+	{name: "Mul8", argLength: 2, commutative: true}, // arg0 * arg1
+	{name: "Mul16", argLength: 2, commutative: true},
+	{name: "Mul32", argLength: 2, commutative: true},
+	{name: "Mul64", argLength: 2, commutative: true},
+	{name: "Mul32F", argLength: 2},
+	{name: "Mul64F", argLength: 2},
+
+	{name: "Div32F", argLength: 2}, // arg0 / arg1
+	{name: "Div64F", argLength: 2},
+
+	{name: "Hmul8", argLength: 2}, // (arg0 * arg1) >> width
+	{name: "Hmul8u", argLength: 2},
+	{name: "Hmul16", argLength: 2},
+	{name: "Hmul16u", argLength: 2},
+	{name: "Hmul32", argLength: 2},
+	{name: "Hmul32u", argLength: 2},
+	{name: "Hmul64", argLength: 2},
+	{name: "Hmul64u", argLength: 2},
+
+	// Weird special instruction for strength reduction of divides.
+	{name: "Avg64u", argLength: 2}, // (uint64(arg0) + uint64(arg1)) / 2, correct to all 64 bits.
+
+	{name: "Div8", argLength: 2}, // arg0 / arg1
+	{name: "Div8u", argLength: 2},
+	{name: "Div16", argLength: 2},
+	{name: "Div16u", argLength: 2},
+	{name: "Div32", argLength: 2},
+	{name: "Div32u", argLength: 2},
+	{name: "Div64", argLength: 2},
+	{name: "Div64u", argLength: 2},
+
+	{name: "Mod8", argLength: 2}, // arg0 % arg1
+	{name: "Mod8u", argLength: 2},
+	{name: "Mod16", argLength: 2},
+	{name: "Mod16u", argLength: 2},
+	{name: "Mod32", argLength: 2},
+	{name: "Mod32u", argLength: 2},
+	{name: "Mod64", argLength: 2},
+	{name: "Mod64u", argLength: 2},
+
+	{name: "And8", argLength: 2, commutative: true}, // arg0 & arg1
+	{name: "And16", argLength: 2, commutative: true},
+	{name: "And32", argLength: 2, commutative: true},
+	{name: "And64", argLength: 2, commutative: true},
+
+	{name: "Or8", argLength: 2, commutative: true}, // arg0 | arg1
+	{name: "Or16", argLength: 2, commutative: true},
+	{name: "Or32", argLength: 2, commutative: true},
+	{name: "Or64", argLength: 2, commutative: true},
+
+	{name: "Xor8", argLength: 2, commutative: true}, // arg0 ^ arg1
+	{name: "Xor16", argLength: 2, commutative: true},
+	{name: "Xor32", argLength: 2, commutative: true},
+	{name: "Xor64", argLength: 2, commutative: true},
+
+	// For shifts, AxB means the shifted value has A bits and the shift amount has B bits.
+	{name: "Lsh8x8", argLength: 2}, // arg0 << arg1
+	{name: "Lsh8x16", argLength: 2},
+	{name: "Lsh8x32", argLength: 2},
+	{name: "Lsh8x64", argLength: 2},
+	{name: "Lsh16x8", argLength: 2},
+	{name: "Lsh16x16", argLength: 2},
+	{name: "Lsh16x32", argLength: 2},
+	{name: "Lsh16x64", argLength: 2},
+	{name: "Lsh32x8", argLength: 2},
+	{name: "Lsh32x16", argLength: 2},
+	{name: "Lsh32x32", argLength: 2},
+	{name: "Lsh32x64", argLength: 2},
+	{name: "Lsh64x8", argLength: 2},
+	{name: "Lsh64x16", argLength: 2},
+	{name: "Lsh64x32", argLength: 2},
+	{name: "Lsh64x64", argLength: 2},
+
+	{name: "Rsh8x8", argLength: 2}, // arg0 >> arg1, signed
+	{name: "Rsh8x16", argLength: 2},
+	{name: "Rsh8x32", argLength: 2},
+	{name: "Rsh8x64", argLength: 2},
+	{name: "Rsh16x8", argLength: 2},
+	{name: "Rsh16x16", argLength: 2},
+	{name: "Rsh16x32", argLength: 2},
+	{name: "Rsh16x64", argLength: 2},
+	{name: "Rsh32x8", argLength: 2},
+	{name: "Rsh32x16", argLength: 2},
+	{name: "Rsh32x32", argLength: 2},
+	{name: "Rsh32x64", argLength: 2},
+	{name: "Rsh64x8", argLength: 2},
+	{name: "Rsh64x16", argLength: 2},
+	{name: "Rsh64x32", argLength: 2},
+	{name: "Rsh64x64", argLength: 2},
+
+	{name: "Rsh8Ux8", argLength: 2}, // arg0 >> arg1, unsigned
+	{name: "Rsh8Ux16", argLength: 2},
+	{name: "Rsh8Ux32", argLength: 2},
+	{name: "Rsh8Ux64", argLength: 2},
+	{name: "Rsh16Ux8", argLength: 2},
+	{name: "Rsh16Ux16", argLength: 2},
+	{name: "Rsh16Ux32", argLength: 2},
+	{name: "Rsh16Ux64", argLength: 2},
+	{name: "Rsh32Ux8", argLength: 2},
+	{name: "Rsh32Ux16", argLength: 2},
+	{name: "Rsh32Ux32", argLength: 2},
+	{name: "Rsh32Ux64", argLength: 2},
+	{name: "Rsh64Ux8", argLength: 2},
+	{name: "Rsh64Ux16", argLength: 2},
+	{name: "Rsh64Ux32", argLength: 2},
+	{name: "Rsh64Ux64", argLength: 2},
+
+	// (Left) rotates replace pattern matches in the front end
+	// of (arg0 << arg1) ^ (arg0 >> (A-arg1))
+	// where A is the bit width of arg0 and result.
+	// Note that because rotates are pattern-matched from
+	// shifts, that a rotate of arg1=A+k (k > 0) bits originated from
+	//    (arg0 << A+k) ^ (arg0 >> -k) =
+	//    0 ^ arg0>>huge_unsigned =
+	//    0 ^ 0 = 0
+	// which is not the same as a rotation by A+k
+	//
+	// However, in the specific case of k = 0, the result of
+	// the shift idiom is the same as the result for the
+	// rotate idiom, i.e., result=arg0.
+	// This is different from shifts, where
+	// arg0 << A is defined to be zero.
+	//
+	// Because of this, and also because the primary use case
+	// for rotates is hashing and crypto code with constant
+	// distance, rotate instructions are only substituted
+	// when arg1 is a constant between 1 and A-1, inclusive.
+	{name: "Lrot8", argLength: 1, aux: "Int64"},
+	{name: "Lrot16", argLength: 1, aux: "Int64"},
+	{name: "Lrot32", argLength: 1, aux: "Int64"},
+	{name: "Lrot64", argLength: 1, aux: "Int64"},
+
+	// 2-input comparisons
+	{name: "Eq8", argLength: 2, commutative: true}, // arg0 == arg1
+	{name: "Eq16", argLength: 2, commutative: true},
+	{name: "Eq32", argLength: 2, commutative: true},
+	{name: "Eq64", argLength: 2, commutative: true},
+	{name: "EqPtr", argLength: 2, commutative: true},
+	{name: "EqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "EqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Eq32F", argLength: 2},
+	{name: "Eq64F", argLength: 2},
+
+	{name: "Neq8", argLength: 2, commutative: true}, // arg0 != arg1
+	{name: "Neq16", argLength: 2, commutative: true},
+	{name: "Neq32", argLength: 2, commutative: true},
+	{name: "Neq64", argLength: 2, commutative: true},
+	{name: "NeqPtr", argLength: 2, commutative: true},
+	{name: "NeqInter", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "NeqSlice", argLength: 2}, // arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Neq32F", argLength: 2},
+	{name: "Neq64F", argLength: 2},
+
+	{name: "Less8", argLength: 2}, // arg0 < arg1
+	{name: "Less8U", argLength: 2},
+	{name: "Less16", argLength: 2},
+	{name: "Less16U", argLength: 2},
+	{name: "Less32", argLength: 2},
+	{name: "Less32U", argLength: 2},
+	{name: "Less64", argLength: 2},
+	{name: "Less64U", argLength: 2},
+	{name: "Less32F", argLength: 2},
+	{name: "Less64F", argLength: 2},
+
+	{name: "Leq8", argLength: 2}, // arg0 <= arg1
+	{name: "Leq8U", argLength: 2},
+	{name: "Leq16", argLength: 2},
+	{name: "Leq16U", argLength: 2},
+	{name: "Leq32", argLength: 2},
+	{name: "Leq32U", argLength: 2},
+	{name: "Leq64", argLength: 2},
+	{name: "Leq64U", argLength: 2},
+	{name: "Leq32F", argLength: 2},
+	{name: "Leq64F", argLength: 2},
+
+	{name: "Greater8", argLength: 2}, // arg0 > arg1
+	{name: "Greater8U", argLength: 2},
+	{name: "Greater16", argLength: 2},
+	{name: "Greater16U", argLength: 2},
+	{name: "Greater32", argLength: 2},
+	{name: "Greater32U", argLength: 2},
+	{name: "Greater64", argLength: 2},
+	{name: "Greater64U", argLength: 2},
+	{name: "Greater32F", argLength: 2},
+	{name: "Greater64F", argLength: 2},
+
+	{name: "Geq8", argLength: 2}, // arg0 <= arg1
+	{name: "Geq8U", argLength: 2},
+	{name: "Geq16", argLength: 2},
+	{name: "Geq16U", argLength: 2},
+	{name: "Geq32", argLength: 2},
+	{name: "Geq32U", argLength: 2},
+	{name: "Geq64", argLength: 2},
+	{name: "Geq64U", argLength: 2},
+	{name: "Geq32F", argLength: 2},
+	{name: "Geq64F", argLength: 2},
+
+	// 1-input ops
+	{name: "Not", argLength: 1}, // !arg0
+
+	{name: "Neg8", argLength: 1}, // -arg0
+	{name: "Neg16", argLength: 1},
+	{name: "Neg32", argLength: 1},
+	{name: "Neg64", argLength: 1},
+	{name: "Neg32F", argLength: 1},
+	{name: "Neg64F", argLength: 1},
+
+	{name: "Com8", argLength: 1}, // ^arg0
+	{name: "Com16", argLength: 1},
+	{name: "Com32", argLength: 1},
+	{name: "Com64", argLength: 1},
+
+	{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
+
+	// Data movement, max argument length for Phi is indefinite so just pick
+	// a really large number
+	{name: "Phi", argLength: -1}, // select an argument based on which predecessor block we came from
+	{name: "Copy", argLength: 1}, // output = arg0
+	// Convert converts between pointers and integers.
+	// We have a special op for this so as to not confuse GC
+	// (particularly stack maps).  It takes a memory arg so it
+	// gets correctly ordered with respect to GC safepoints.
+	// arg0=ptr/int arg1=mem, output=int/ptr
+	{name: "Convert", argLength: 2},
+
+	// constants.  Constant values are stored in the aux or
+	// auxint fields.
+	{name: "ConstBool", aux: "Bool"},     // auxint is 0 for false and 1 for true
+	{name: "ConstString", aux: "String"}, // value is aux.(string)
+	{name: "ConstNil", typ: "BytePtr"},   // nil pointer
+	{name: "Const8", aux: "Int8"},        // value is low 8 bits of auxint
+	{name: "Const16", aux: "Int16"},      // value is low 16 bits of auxint
+	{name: "Const32", aux: "Int32"},      // value is low 32 bits of auxint
+	{name: "Const64", aux: "Int64"},      // value is auxint
+	{name: "Const32F", aux: "Float"},     // value is math.Float64frombits(uint64(auxint))
+	{name: "Const64F", aux: "Float"},     // value is math.Float64frombits(uint64(auxint))
+	{name: "ConstInterface"},             // nil interface
+	{name: "ConstSlice"},                 // nil slice
+
+	// Constant-like things
+	{name: "InitMem"},            // memory input to the function.
+	{name: "Arg", aux: "SymOff"}, // argument to the function.  aux=GCNode of arg, off = offset in that arg.
+
+	// The address of a variable.  arg0 is the base pointer (SB or SP, depending
+	// on whether it is a global or stack variable).  The Aux field identifies the
+	// variable.  It will be either an *ExternSymbol (with arg0=SB), *ArgSymbol (arg0=SP),
+	// or *AutoSymbol (arg0=SP).
+	{name: "Addr", argLength: 1, aux: "Sym"}, // Address of a variable.  Arg0=SP or SB.  Aux identifies the variable.
+
+	{name: "SP"},                 // stack pointer
+	{name: "SB", typ: "Uintptr"}, // static base pointer (a.k.a. globals pointer)
+	{name: "Func", aux: "Sym"},   // entry address of a function
+
+	// Memory operations
+	{name: "Load", argLength: 2},                            // Load from arg0.  arg1=memory
+	{name: "Store", argLength: 3, typ: "Mem", aux: "Int64"}, // Store arg1 to arg0.  arg2=memory, auxint=size.  Returns memory.
+	{name: "Move", argLength: 3, aux: "Int64"},              // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size.  Returns memory.
+	{name: "Zero", argLength: 2, aux: "Int64"},              // arg0=destptr, arg1=mem, auxint=size. Returns memory.
+
+	// Function calls.  Arguments to the call have already been written to the stack.
+	// Return values appear on the stack.  The method receiver, if any, is treated
+	// as a phantom first argument.
+	{name: "ClosureCall", argLength: 3, aux: "Int64"}, // arg0=code pointer, arg1=context ptr, arg2=memory.  auxint=arg size.  Returns memory.
+	{name: "StaticCall", argLength: 1, aux: "SymOff"}, // call function aux.(*gc.Sym), arg0=memory.  auxint=arg size.  Returns memory.
+	{name: "DeferCall", argLength: 1, aux: "Int64"},   // defer call.  arg0=memory, auxint=arg size.  Returns memory.
+	{name: "GoCall", argLength: 1, aux: "Int64"},      // go call.  arg0=memory, auxint=arg size.  Returns memory.
+	{name: "InterCall", argLength: 2, aux: "Int64"},   // interface call.  arg0=code pointer, arg1=memory, auxint=arg size.  Returns memory.
+
+	// Conversions: signed extensions, zero (unsigned) extensions, truncations
+	{name: "SignExt8to16", argLength: 1, typ: "Int16"},
+	{name: "SignExt8to32", argLength: 1},
+	{name: "SignExt8to64", argLength: 1},
+	{name: "SignExt16to32", argLength: 1},
+	{name: "SignExt16to64", argLength: 1},
+	{name: "SignExt32to64", argLength: 1},
+	{name: "ZeroExt8to16", argLength: 1, typ: "UInt16"},
+	{name: "ZeroExt8to32", argLength: 1},
+	{name: "ZeroExt8to64", argLength: 1},
+	{name: "ZeroExt16to32", argLength: 1},
+	{name: "ZeroExt16to64", argLength: 1},
+	{name: "ZeroExt32to64", argLength: 1},
+	{name: "Trunc16to8", argLength: 1},
+	{name: "Trunc32to8", argLength: 1},
+	{name: "Trunc32to16", argLength: 1},
+	{name: "Trunc64to8", argLength: 1},
+	{name: "Trunc64to16", argLength: 1},
+	{name: "Trunc64to32", argLength: 1},
+
+	{name: "Cvt32to32F", argLength: 1},
+	{name: "Cvt32to64F", argLength: 1},
+	{name: "Cvt64to32F", argLength: 1},
+	{name: "Cvt64to64F", argLength: 1},
+	{name: "Cvt32Fto32", argLength: 1},
+	{name: "Cvt32Fto64", argLength: 1},
+	{name: "Cvt64Fto32", argLength: 1},
+	{name: "Cvt64Fto64", argLength: 1},
+	{name: "Cvt32Fto64F", argLength: 1},
+	{name: "Cvt64Fto32F", argLength: 1},
+
+	// Automatically inserted safety checks
+	{name: "IsNonNil", argLength: 1, typ: "Bool"},        // arg0 != nil
+	{name: "IsInBounds", argLength: 2, typ: "Bool"},      // 0 <= arg0 < arg1
+	{name: "IsSliceInBounds", argLength: 2, typ: "Bool"}, // 0 <= arg0 <= arg1
+	{name: "NilCheck", argLength: 2, typ: "Void"},        // arg0=ptr, arg1=mem.  Panics if arg0 is nil, returns void.
+
+	// Pseudo-ops
+	{name: "GetG", argLength: 1}, // runtime.getg() (read g pointer).  arg0=mem
+	{name: "GetClosurePtr"},      // get closure pointer from dedicated register
+
+	// Indexing operations
+	{name: "ArrayIndex", argLength: 2},           // arg0=array, arg1=index.  Returns a[i]
+	{name: "PtrIndex", argLength: 2},             // arg0=ptr, arg1=index. Computes ptr+sizeof(*v.type)*index, where index is extended to ptrwidth type
+	{name: "OffPtr", argLength: 1, aux: "Int64"}, // arg0 + auxint (arg0 and result are pointers)
+
+	// Slices
+	{name: "SliceMake", argLength: 3},                // arg0=ptr, arg1=len, arg2=cap
+	{name: "SlicePtr", argLength: 1, typ: "BytePtr"}, // ptr(arg0)
+	{name: "SliceLen", argLength: 1},                 // len(arg0)
+	{name: "SliceCap", argLength: 1},                 // cap(arg0)
+
+	// Complex (part/whole)
+	{name: "ComplexMake", argLength: 2}, // arg0=real, arg1=imag
+	{name: "ComplexReal", argLength: 1}, // real(arg0)
+	{name: "ComplexImag", argLength: 1}, // imag(arg0)
+
+	// Strings
+	{name: "StringMake", argLength: 2}, // arg0=ptr, arg1=len
+	{name: "StringPtr", argLength: 1},  // ptr(arg0)
+	{name: "StringLen", argLength: 1},  // len(arg0)
+
+	// Interfaces
+	{name: "IMake", argLength: 2},                // arg0=itab, arg1=data
+	{name: "ITab", argLength: 1, typ: "BytePtr"}, // arg0=interface, returns itable field
+	{name: "IData", argLength: 1},                // arg0=interface, returns data field
+
+	// Structs
+	{name: "StructMake0"},                              // Returns struct with 0 fields.
+	{name: "StructMake1", argLength: 1},                // arg0=field0.  Returns struct.
+	{name: "StructMake2", argLength: 2},                // arg0,arg1=field0,field1.  Returns struct.
+	{name: "StructMake3", argLength: 3},                // arg0..2=field0..2.  Returns struct.
+	{name: "StructMake4", argLength: 4},                // arg0..3=field0..3.  Returns struct.
+	{name: "StructSelect", argLength: 1, aux: "Int64"}, // arg0=struct, auxint=field index.  Returns the auxint'th field.
+
+	// Spill&restore ops for the register allocator.  These are
+	// semantically identical to OpCopy; they do not take/return
+	// stores like regular memory ops do.  We can get away without memory
+	// args because we know there is no aliasing of spill slots on the stack.
+	{name: "StoreReg", argLength: 1},
+	{name: "LoadReg", argLength: 1},
+
+	// Used during ssa construction.  Like Copy, but the arg has not been specified yet.
+	{name: "FwdRef"},
+
+	// Unknown value.  Used for Values whose values don't matter because they are dead code.
+	{name: "Unknown"},
+
+	{name: "VarDef", argLength: 1, aux: "Sym", typ: "Mem"}, // aux is a *gc.Node of a variable that is about to be initialized.  arg0=mem, returns mem
+	{name: "VarKill", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that is known to be dead.  arg0=mem, returns mem
+	{name: "VarLive", argLength: 1, aux: "Sym"},            // aux is a *gc.Node of a variable that must be kept live.  arg0=mem, returns mem
+}
+
+//     kind           control    successors       implicit exit
+//   ----------------------------------------------------------
+//     Exit        return mem                []             yes
+//      Ret        return mem                []             yes
+//   RetJmp        return mem                []             yes
+//    Plain               nil            [next]
+//       If   a boolean Value      [then, else]
+//     Call               mem            [next]             yes  (control opcode should be OpCall or OpStaticCall)
+//    Check              void            [next]             yes  (control opcode should be Op{Lowered}NilCheck)
+//    First               nil    [always,never]
+
+var genericBlocks = []blockData{
+	{name: "Plain"},  // a single successor
+	{name: "If"},     // 2 successors, if control goto Succs[0] else goto Succs[1]
+	{name: "Call"},   // 1 successor, control is call op (of memory type)
+	{name: "Check"},  // 1 successor, control is nilcheck op (of void type)
+	{name: "Ret"},    // no successors, control value is memory result
+	{name: "RetJmp"}, // no successors, jumps to b.Aux.(*gc.Sym)
+	{name: "Exit"},   // no successors, control value generates a panic
+
+	// transient block states used for dead code removal
+	{name: "First"}, // 2 successors, always takes the first one (second is dead)
+	{name: "Dead"},  // no successors; determined to be dead but not yet removed
+}
+
+func init() {
+	archs = append(archs, arch{"generic", genericOps, genericBlocks, nil})
+}
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@ -0,0 +1,262 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The gen command generates Go code (in the parent directory) for all
+// the architecture-specific opcodes, blocks, and rewrites.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+	"regexp"
+	"sort"
+)
+
+type arch struct {
+	name     string
+	ops      []opData
+	blocks   []blockData
+	regnames []string
+}
+
+type opData struct {
+	name              string
+	reg               regInfo
+	asm               string
+	typ               string // default result type
+	aux               string
+	rematerializeable bool
+	argLength         int32 // number of arguments, if -1, then this operation has a variable number of arguments
+	commutative       bool  // this operation is commutative (e.g. addition)
+}
+
+type blockData struct {
+	name string
+}
+
+type regInfo struct {
+	inputs   []regMask
+	clobbers regMask
+	outputs  []regMask
+}
+
+type regMask uint64
+
+func (a arch) regMaskComment(r regMask) string {
+	var buf bytes.Buffer
+	for i := uint64(0); r != 0; i++ {
+		if r&1 != 0 {
+			if buf.Len() == 0 {
+				buf.WriteString(" //")
+			}
+			buf.WriteString(" ")
+			buf.WriteString(a.regnames[i])
+		}
+		r >>= 1
+	}
+	return buf.String()
+}
+
+var archs []arch
+
+func main() {
+	flag.Parse()
+	genOp()
+	genLower()
+}
+
+func genOp() {
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// autogenerated: do not edit!\n")
+	fmt.Fprintf(w, "// generated from gen/*Ops.go\n")
+	fmt.Fprintln(w)
+	fmt.Fprintln(w, "package ssa")
+
+	fmt.Fprintln(w, "import \"cmd/internal/obj/x86\"")
+
+	// generate Block* declarations
+	fmt.Fprintln(w, "const (")
+	fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
+	for _, a := range archs {
+		fmt.Fprintln(w)
+		for _, d := range a.blocks {
+			fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name)
+		}
+	}
+	fmt.Fprintln(w, ")")
+
+	// generate block kind string method
+	fmt.Fprintln(w, "var blockString = [...]string{")
+	fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
+	for _, a := range archs {
+		fmt.Fprintln(w)
+		for _, b := range a.blocks {
+			fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name)
+		}
+	}
+	fmt.Fprintln(w, "}")
+	fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}")
+
+	// generate Op* declarations
+	fmt.Fprintln(w, "const (")
+	fmt.Fprintln(w, "OpInvalid Op = iota")
+	for _, a := range archs {
+		fmt.Fprintln(w)
+		for _, v := range a.ops {
+			fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name)
+		}
+	}
+	fmt.Fprintln(w, ")")
+
+	// generate OpInfo table
+	fmt.Fprintln(w, "var opcodeTable = [...]opInfo{")
+	fmt.Fprintln(w, " { name: \"OpInvalid\" },")
+	for _, a := range archs {
+		fmt.Fprintln(w)
+		for _, v := range a.ops {
+			fmt.Fprintln(w, "{")
+			fmt.Fprintf(w, "name:\"%s\",\n", v.name)
+
+			// flags
+			if v.aux != "" {
+				fmt.Fprintf(w, "auxType: aux%s,\n", v.aux)
+			}
+			fmt.Fprintf(w, "argLen: %d,\n", v.argLength)
+
+			if v.rematerializeable {
+				if v.reg.clobbers != 0 {
+					log.Fatalf("%s is rematerializeable and clobbers registers", v.name)
+				}
+				fmt.Fprintln(w, "rematerializeable: true,")
+			}
+			if v.commutative {
+				fmt.Fprintln(w, "commutative: true,")
+			}
+			if a.name == "generic" {
+				fmt.Fprintln(w, "generic:true,")
+				fmt.Fprintln(w, "},") // close op
+				// generic ops have no reg info or asm
+				continue
+			}
+			if v.asm != "" {
+				fmt.Fprintf(w, "asm: x86.A%s,\n", v.asm)
+			}
+			fmt.Fprintln(w, "reg:regInfo{")
+
+			// Compute input allocation order.  We allocate from the
+			// most to the least constrained input.  This order guarantees
+			// that we will always be able to find a register.
+			var s []intPair
+			for i, r := range v.reg.inputs {
+				if r != 0 {
+					s = append(s, intPair{countRegs(r), i})
+				}
+			}
+			if len(s) > 0 {
+				sort.Sort(byKey(s))
+				fmt.Fprintln(w, "inputs: []inputInfo{")
+				for _, p := range s {
+					r := v.reg.inputs[p.val]
+					fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
+				}
+				fmt.Fprintln(w, "},")
+			}
+			if v.reg.clobbers > 0 {
+				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
+			}
+			// reg outputs
+			if len(v.reg.outputs) > 0 {
+				fmt.Fprintln(w, "outputs: []regMask{")
+				for _, r := range v.reg.outputs {
+					fmt.Fprintf(w, "%d,%s\n", r, a.regMaskComment(r))
+				}
+				fmt.Fprintln(w, "},")
+			}
+			fmt.Fprintln(w, "},") // close reg info
+			fmt.Fprintln(w, "},") // close op
+		}
+	}
+	fmt.Fprintln(w, "}")
+
+	fmt.Fprintln(w, "func (o Op) Asm() int {return opcodeTable[o].asm}")
+
+	// generate op string method
+	fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }")
+
+	// gofmt result
+	b := w.Bytes()
+	var err error
+	b, err = format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", w.Bytes())
+		panic(err)
+	}
+
+	err = ioutil.WriteFile("../opGen.go", b, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+
+	// Check that ../gc/ssa.go handles all the arch-specific opcodes.
+	// This is very much a hack, but it is better than nothing.
+	ssa, err := ioutil.ReadFile("../../gc/ssa.go")
+	if err != nil {
+		log.Fatalf("can't read ../../gc/ssa.go: %v", err)
+	}
+	for _, a := range archs {
+		if a.name == "generic" {
+			continue
+		}
+		for _, v := range a.ops {
+			pattern := fmt.Sprintf("\\Wssa[.]Op%s%s\\W", a.name, v.name)
+			match, err := regexp.Match(pattern, ssa)
+			if err != nil {
+				log.Fatalf("bad opcode regexp %s: %v", pattern, err)
+			}
+			if !match {
+				log.Fatalf("Op%s%s has no code generation in ../../gc/ssa.go", a.name, v.name)
+			}
+		}
+	}
+}
+
+// Name returns the name of the architecture for use in Op* and Block* enumerations.
+func (a arch) Name() string {
+	s := a.name
+	if s == "generic" {
+		s = ""
+	}
+	return s
+}
+
+func genLower() {
+	for _, a := range archs {
+		genRules(a)
+	}
+}
+
+// countRegs returns the number of set bits in the register mask.
+func countRegs(r regMask) int {
+	n := 0
+	for r != 0 {
+		n += int(r & 1)
+		r >>= 1
+	}
+	return n
+}
+
+// for sorting a pair of integers by key
+type intPair struct {
+	key, val int
+}
+type byKey []intPair
+
+func (a byKey) Len() int           { return len(a) }
+func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@ -0,0 +1,630 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program generates Go code that applies rewrite rules to a Value.
+// The generated code implements a function of type func (v *Value) bool
+// which returns true iff if did something.
+// Ideas stolen from Swift: http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-2000-2.html
+
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"fmt"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// rule syntax:
+//  sexpr [&& extra conditions] -> [@block] sexpr
+//
+// sexpr are s-expressions (lisp-like parenthesized groupings)
+// sexpr ::= (opcode sexpr*)
+//         | variable
+//         | <type>
+//         | [auxint]
+//         | {aux}
+//
+// aux      ::= variable | {code}
+// type     ::= variable | {code}
+// variable ::= some token
+// opcode   ::= one of the opcodes from ../op.go (without the Op prefix)
+
+// extra conditions is just a chunk of Go that evaluates to a boolean.  It may use
+// variables declared in the matching sexpr.  The variable "v" is predefined to be
+// the value matched by the entire rule.
+
+// If multiple rules match, the first one in file order is selected.
+
+var (
+	genLog = flag.Bool("log", false, "generate code that logs; for debugging only")
+)
+
+type Rule struct {
+	rule   string
+	lineno int
+}
+
+func (r Rule) String() string {
+	return fmt.Sprintf("rule %q at line %d", r.rule, r.lineno)
+}
+
+// parse returns the matching part of the rule, additional conditions, and the result.
+func (r Rule) parse() (match, cond, result string) {
+	s := strings.Split(r.rule, "->")
+	if len(s) != 2 {
+		log.Fatalf("no arrow in %s", r)
+	}
+	match = strings.TrimSpace(s[0])
+	result = strings.TrimSpace(s[1])
+	cond = ""
+	if i := strings.Index(match, "&&"); i >= 0 {
+		cond = strings.TrimSpace(match[i+2:])
+		match = strings.TrimSpace(match[:i])
+	}
+	return match, cond, result
+}
+
+func genRules(arch arch) {
+	// Open input file.
+	text, err := os.Open(arch.name + ".rules")
+	if err != nil {
+		log.Fatalf("can't read rule file: %v", err)
+	}
+
+	// oprules contains a list of rules for each block and opcode
+	blockrules := map[string][]Rule{}
+	oprules := map[string][]Rule{}
+
+	// read rule file
+	scanner := bufio.NewScanner(text)
+	rule := ""
+	var lineno int
+	for scanner.Scan() {
+		lineno++
+		line := scanner.Text()
+		if i := strings.Index(line, "//"); i >= 0 {
+			// Remove comments.  Note that this isn't string safe, so
+			// it will truncate lines with // inside strings.  Oh well.
+			line = line[:i]
+		}
+		rule += " " + line
+		rule = strings.TrimSpace(rule)
+		if rule == "" {
+			continue
+		}
+		if !strings.Contains(rule, "->") {
+			continue
+		}
+		if strings.HasSuffix(rule, "->") {
+			continue
+		}
+		if unbalanced(rule) {
+			continue
+		}
+		op := strings.Split(rule, " ")[0][1:]
+		if op[len(op)-1] == ')' {
+			op = op[:len(op)-1] // rule has only opcode, e.g. (ConstNil) -> ...
+		}
+		if isBlock(op, arch) {
+			blockrules[op] = append(blockrules[op], Rule{rule: rule, lineno: lineno})
+		} else {
+			oprules[op] = append(oprules[op], Rule{rule: rule, lineno: lineno})
+		}
+		rule = ""
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatalf("scanner failed: %v\n", err)
+	}
+	if unbalanced(rule) {
+		log.Fatalf("unbalanced rule at line %d: %v\n", lineno, rule)
+	}
+
+	// Order all the ops.
+	var ops []string
+	for op := range oprules {
+		ops = append(ops, op)
+	}
+	sort.Strings(ops)
+
+	// Start output buffer, write header.
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// autogenerated from gen/%s.rules: do not edit!\n", arch.name)
+	fmt.Fprintln(w, "// generated with: cd gen; go run *.go")
+	fmt.Fprintln(w)
+	fmt.Fprintln(w, "package ssa")
+	if *genLog {
+		fmt.Fprintln(w, "import \"fmt\"")
+	}
+	fmt.Fprintln(w, "import \"math\"")
+	fmt.Fprintln(w, "var _ = math.MinInt8 // in case not otherwise used")
+
+	// Main rewrite routine is a switch on v.Op.
+	fmt.Fprintf(w, "func rewriteValue%s(v *Value, config *Config) bool {\n", arch.name)
+	fmt.Fprintf(w, "switch v.Op {\n")
+	for _, op := range ops {
+		fmt.Fprintf(w, "case %s:\n", opName(op, arch))
+		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, opName(op, arch))
+	}
+	fmt.Fprintf(w, "}\n")
+	fmt.Fprintf(w, "return false\n")
+	fmt.Fprintf(w, "}\n")
+
+	// Generate a routine per op.  Note that we don't make one giant routine
+	// because it is too big for some compilers.
+	for _, op := range ops {
+		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, opName(op, arch))
+		fmt.Fprintln(w, "b := v.Block")
+		fmt.Fprintln(w, "_ = b")
+		for _, rule := range oprules[op] {
+			match, cond, result := rule.parse()
+			fmt.Fprintf(w, "// match: %s\n", match)
+			fmt.Fprintf(w, "// cond: %s\n", cond)
+			fmt.Fprintf(w, "// result: %s\n", result)
+
+			fmt.Fprintf(w, "for {\n")
+			genMatch(w, arch, match)
+
+			if cond != "" {
+				fmt.Fprintf(w, "if !(%s) {\nbreak\n}\n", cond)
+			}
+
+			genResult(w, arch, result)
+			if *genLog {
+				fmt.Fprintf(w, "fmt.Println(\"rewrite %s.rules:%d\")\n", arch.name, rule.lineno)
+			}
+			fmt.Fprintf(w, "return true\n")
+
+			fmt.Fprintf(w, "}\n")
+		}
+		fmt.Fprintf(w, "return false\n")
+		fmt.Fprintf(w, "}\n")
+	}
+
+	// Generate block rewrite function.  There are only a few block types
+	// so we can make this one function with a switch.
+	fmt.Fprintf(w, "func rewriteBlock%s(b *Block) bool {\n", arch.name)
+	fmt.Fprintf(w, "switch b.Kind {\n")
+	ops = nil
+	for op := range blockrules {
+		ops = append(ops, op)
+	}
+	sort.Strings(ops)
+	for _, op := range ops {
+		fmt.Fprintf(w, "case %s:\n", blockName(op, arch))
+		for _, rule := range blockrules[op] {
+			match, cond, result := rule.parse()
+			fmt.Fprintf(w, "// match: %s\n", match)
+			fmt.Fprintf(w, "// cond: %s\n", cond)
+			fmt.Fprintf(w, "// result: %s\n", result)
+
+			fmt.Fprintf(w, "for {\n")
+
+			s := split(match[1 : len(match)-1]) // remove parens, then split
+
+			// check match of control value
+			if s[1] != "nil" {
+				fmt.Fprintf(w, "v := b.Control\n")
+				genMatch0(w, arch, s[1], "v", map[string]string{}, false)
+			}
+
+			// assign successor names
+			succs := s[2:]
+			for i, a := range succs {
+				if a != "_" {
+					fmt.Fprintf(w, "%s := b.Succs[%d]\n", a, i)
+				}
+			}
+
+			if cond != "" {
+				fmt.Fprintf(w, "if !(%s) {\nbreak\n}\n", cond)
+			}
+
+			// Rule matches.  Generate result.
+			t := split(result[1 : len(result)-1]) // remove parens, then split
+			newsuccs := t[2:]
+
+			// Check if newsuccs is the same set as succs.
+			m := map[string]bool{}
+			for _, succ := range succs {
+				if m[succ] {
+					log.Fatalf("can't have a repeat successor name %s in %s", succ, rule)
+				}
+				m[succ] = true
+			}
+			for _, succ := range newsuccs {
+				if !m[succ] {
+					log.Fatalf("unknown successor %s in %s", succ, rule)
+				}
+				delete(m, succ)
+			}
+			if len(m) != 0 {
+				log.Fatalf("unmatched successors %v in %s", m, rule)
+			}
+
+			// Modify predecessor lists for no-longer-reachable blocks
+			for succ := range m {
+				fmt.Fprintf(w, "b.Func.removePredecessor(b, %s)\n", succ)
+			}
+
+			fmt.Fprintf(w, "b.Kind = %s\n", blockName(t[0], arch))
+			if t[1] == "nil" {
+				fmt.Fprintf(w, "b.Control = nil\n")
+			} else {
+				fmt.Fprintf(w, "b.Control = %s\n", genResult0(w, arch, t[1], new(int), false, false))
+			}
+			if len(newsuccs) < len(succs) {
+				fmt.Fprintf(w, "b.Succs = b.Succs[:%d]\n", len(newsuccs))
+			}
+			for i, a := range newsuccs {
+				fmt.Fprintf(w, "b.Succs[%d] = %s\n", i, a)
+			}
+			// Update branch prediction
+			switch {
+			case len(newsuccs) != 2:
+				fmt.Fprintln(w, "b.Likely = BranchUnknown")
+			case newsuccs[0] == succs[0] && newsuccs[1] == succs[1]:
+				// unchanged
+			case newsuccs[0] == succs[1] && newsuccs[1] == succs[0]:
+				// flipped
+				fmt.Fprintln(w, "b.Likely *= -1")
+			default:
+				// unknown
+				fmt.Fprintln(w, "b.Likely = BranchUnknown")
+			}
+
+			if *genLog {
+				fmt.Fprintf(w, "fmt.Println(\"rewrite %s.rules:%d\")\n", arch.name, rule.lineno)
+			}
+			fmt.Fprintf(w, "return true\n")
+
+			fmt.Fprintf(w, "}\n")
+		}
+	}
+	fmt.Fprintf(w, "}\n")
+	fmt.Fprintf(w, "return false\n")
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// Write to file
+	err = ioutil.WriteFile("../rewrite"+arch.name+".go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
+
+func genMatch(w io.Writer, arch arch, match string) {
+	genMatch0(w, arch, match, "v", map[string]string{}, true)
+}
+
+func genMatch0(w io.Writer, arch arch, match, v string, m map[string]string, top bool) {
+	if match[0] != '(' {
+		if _, ok := m[match]; ok {
+			// variable already has a definition.  Check whether
+			// the old definition and the new definition match.
+			// For example, (add x x).  Equality is just pointer equality
+			// on Values (so cse is important to do before lowering).
+			fmt.Fprintf(w, "if %s != %s {\nbreak\n}\n", v, match)
+			return
+		}
+		// remember that this variable references the given value
+		if match == "_" {
+			return
+		}
+		m[match] = v
+		fmt.Fprintf(w, "%s := %s\n", match, v)
+		return
+	}
+
+	// split body up into regions.  Split by spaces/tabs, except those
+	// contained in () or {}.
+	s := split(match[1 : len(match)-1]) // remove parens, then split
+
+	// check op
+	if !top {
+		fmt.Fprintf(w, "if %s.Op != %s {\nbreak\n}\n", v, opName(s[0], arch))
+	}
+
+	// check type/aux/args
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			t := a[1 : len(a)-1] // remove <>
+			if !isVariable(t) {
+				// code.  We must match the results of this code.
+				fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, t)
+			} else {
+				// variable
+				if u, ok := m[t]; ok {
+					// must match previous variable
+					fmt.Fprintf(w, "if %s.Type != %s {\nbreak\n}\n", v, u)
+				} else {
+					m[t] = v + ".Type"
+					fmt.Fprintf(w, "%s := %s.Type\n", t, v)
+				}
+			}
+		} else if a[0] == '[' {
+			// auxint restriction
+			x := a[1 : len(a)-1] // remove []
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, x)
+			} else {
+				// variable
+				if y, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.AuxInt != %s {\nbreak\n}\n", v, y)
+				} else {
+					m[x] = v + ".AuxInt"
+					fmt.Fprintf(w, "%s := %s.AuxInt\n", x, v)
+				}
+			}
+		} else if a[0] == '{' {
+			// auxint restriction
+			x := a[1 : len(a)-1] // remove {}
+			if !isVariable(x) {
+				// code
+				fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, x)
+			} else {
+				// variable
+				if y, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.Aux != %s {\nbreak\n}\n", v, y)
+				} else {
+					m[x] = v + ".Aux"
+					fmt.Fprintf(w, "%s := %s.Aux\n", x, v)
+				}
+			}
+		} else {
+			// variable or sexpr
+			genMatch0(w, arch, a, fmt.Sprintf("%s.Args[%d]", v, argnum), m, false)
+			argnum++
+		}
+	}
+
+	variableLength := false
+	for _, op := range genericOps {
+		if op.name == s[0] && op.argLength == -1 {
+			variableLength = true
+			break
+		}
+	}
+	for _, op := range arch.ops {
+		if op.name == s[0] && op.argLength == -1 {
+			variableLength = true
+			break
+		}
+	}
+	if variableLength {
+		fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, argnum)
+	}
+}
+
+func genResult(w io.Writer, arch arch, result string) {
+	move := false
+	if result[0] == '@' {
+		// parse @block directive
+		s := strings.SplitN(result[1:], " ", 2)
+		fmt.Fprintf(w, "b = %s\n", s[0])
+		result = s[1]
+		move = true
+	}
+	genResult0(w, arch, result, new(int), true, move)
+}
+func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move bool) string {
+	// TODO: when generating a constant result, use f.constVal to avoid
+	// introducing copies just to clean them up again.
+	if result[0] != '(' {
+		// variable
+		if top {
+			// It in not safe in general to move a variable between blocks
+			// (and particularly not a phi node).
+			// Introduce a copy.
+			fmt.Fprintf(w, "v.reset(OpCopy)\n")
+			fmt.Fprintf(w, "v.Type = %s.Type\n", result)
+			fmt.Fprintf(w, "v.AddArg(%s)\n", result)
+		}
+		return result
+	}
+
+	s := split(result[1 : len(result)-1]) // remove parens, then split
+
+	// Find the type of the variable.
+	var opType string
+	var typeOverride bool
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			opType = a[1 : len(a)-1] // remove <>
+			typeOverride = true
+			break
+		}
+	}
+	if opType == "" {
+		// find default type, if any
+		for _, op := range arch.ops {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
+	if opType == "" {
+		for _, op := range genericOps {
+			if op.name == s[0] && op.typ != "" {
+				opType = typeName(op.typ)
+				break
+			}
+		}
+	}
+	var v string
+	if top && !move {
+		v = "v"
+		fmt.Fprintf(w, "v.reset(%s)\n", opName(s[0], arch))
+		if typeOverride {
+			fmt.Fprintf(w, "v.Type = %s\n", opType)
+		}
+	} else {
+		if opType == "" {
+			log.Fatalf("sub-expression %s (op=%s) must have a type", result, s[0])
+		}
+		v = fmt.Sprintf("v%d", *alloc)
+		*alloc++
+		fmt.Fprintf(w, "%s := b.NewValue0(v.Line, %s, %s)\n", v, opName(s[0], arch), opType)
+		if move {
+			// Rewrite original into a copy
+			fmt.Fprintf(w, "v.reset(OpCopy)\n")
+			fmt.Fprintf(w, "v.AddArg(%s)\n", v)
+		}
+	}
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction, handled above
+		} else if a[0] == '[' {
+			// auxint restriction
+			x := a[1 : len(a)-1] // remove []
+			fmt.Fprintf(w, "%s.AuxInt = %s\n", v, x)
+		} else if a[0] == '{' {
+			// aux restriction
+			x := a[1 : len(a)-1] // remove {}
+			fmt.Fprintf(w, "%s.Aux = %s\n", v, x)
+		} else {
+			// regular argument (sexpr or variable)
+			x := genResult0(w, arch, a, alloc, false, move)
+			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+		}
+	}
+
+	return v
+}
+
+func split(s string) []string {
+	var r []string
+
+outer:
+	for s != "" {
+		d := 0               // depth of ({[<
+		var open, close byte // opening and closing markers ({[< or )}]>
+		nonsp := false       // found a non-space char so far
+		for i := 0; i < len(s); i++ {
+			switch {
+			case d == 0 && s[i] == '(':
+				open, close = '(', ')'
+				d++
+			case d == 0 && s[i] == '<':
+				open, close = '<', '>'
+				d++
+			case d == 0 && s[i] == '[':
+				open, close = '[', ']'
+				d++
+			case d == 0 && s[i] == '{':
+				open, close = '{', '}'
+				d++
+			case d == 0 && (s[i] == ' ' || s[i] == '\t'):
+				if nonsp {
+					r = append(r, strings.TrimSpace(s[:i]))
+					s = s[i:]
+					continue outer
+				}
+			case d > 0 && s[i] == open:
+				d++
+			case d > 0 && s[i] == close:
+				d--
+			default:
+				nonsp = true
+			}
+		}
+		if d != 0 {
+			panic("imbalanced expression: " + s)
+		}
+		if nonsp {
+			r = append(r, strings.TrimSpace(s))
+		}
+		break
+	}
+	return r
+}
+
+// isBlock returns true if this op is a block opcode.
+func isBlock(name string, arch arch) bool {
+	for _, b := range genericBlocks {
+		if b.name == name {
+			return true
+		}
+	}
+	for _, b := range arch.blocks {
+		if b.name == name {
+			return true
+		}
+	}
+	return false
+}
+
+// opName converts from an op name specified in a rule file to an Op enum.
+// if the name matches a generic op, returns "Op" plus the specified name.
+// Otherwise, returns "Op" plus arch name plus op name.
+func opName(name string, arch arch) string {
+	for _, op := range genericOps {
+		if op.name == name {
+			return "Op" + name
+		}
+	}
+	return "Op" + arch.name + name
+}
+
+func blockName(name string, arch arch) string {
+	for _, b := range genericBlocks {
+		if b.name == name {
+			return "Block" + name
+		}
+	}
+	return "Block" + arch.name + name
+}
+
+// typeName returns the string to use to generate a type.
+func typeName(typ string) string {
+	switch typ {
+	case "Flags", "Mem", "Void", "Int128":
+		return "Type" + typ
+	default:
+		return "config.fe.Type" + typ + "()"
+	}
+}
+
+// unbalanced returns true if there aren't the same number of ( and ) in the string.
+func unbalanced(s string) bool {
+	var left, right int
+	for _, c := range s {
+		if c == '(' {
+			left++
+		}
+		if c == ')' {
+			right++
+		}
+	}
+	return left != right
+}
+
+// isVariable reports whether s is a single Go alphanumeric identifier.
+func isVariable(s string) bool {
+	b, err := regexp.MatchString("^[A-Za-z_][A-Za-z_0-9]*$", s)
+	if err != nil {
+		panic("bad variable regexp")
+	}
+	return b
+}
--- a/src/cmd/compile/internal/ssa/html.go
+++ b/src/cmd/compile/internal/ssa/html.go
@ -0,0 +1,478 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"bytes"
+	"fmt"
+	"html"
+	"io"
+	"os"
+)
+
+type HTMLWriter struct {
+	Logger
+	*os.File
+}
+
+func NewHTMLWriter(path string, logger Logger, funcname string) *HTMLWriter {
+	out, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+	if err != nil {
+		logger.Fatalf(0, "%v", err)
+	}
+	html := HTMLWriter{File: out, Logger: logger}
+	html.start(funcname)
+	return &html
+}
+
+func (w *HTMLWriter) start(name string) {
+	if w == nil {
+		return
+	}
+	w.WriteString("<html>")
+	w.WriteString(`<head>
+<style>
+
+#helplink {
+    margin-bottom: 15px;
+    display: block;
+    margin-top: -15px;
+}
+
+#help {
+    display: none;
+}
+
+.stats {
+	font-size: 60%;
+}
+
+table {
+    border: 1px solid black;
+    table-layout: fixed;
+    width: 300px;
+}
+
+th, td {
+    border: 1px solid black;
+    overflow: hidden;
+    width: 400px;
+    vertical-align: top;
+    padding: 5px;
+}
+
+li {
+    list-style-type: none;
+}
+
+li.ssa-long-value {
+    text-indent: -2em;  /* indent wrapped lines */
+}
+
+li.ssa-value-list {
+    display: inline;
+}
+
+li.ssa-start-block {
+    padding: 0;
+    margin: 0;
+}
+
+li.ssa-end-block {
+    padding: 0;
+    margin: 0;
+}
+
+ul.ssa-print-func {
+    padding-left: 0;
+}
+
+dl.ssa-gen {
+    padding-left: 0;
+}
+
+dt.ssa-prog-src {
+    padding: 0;
+    margin: 0;
+    float: left;
+    width: 4em;
+}
+
+dd.ssa-prog {
+    padding: 0;
+    margin-right: 0;
+    margin-left: 4em;
+}
+
+.dead-value {
+    color: gray;
+}
+
+.dead-block {
+    opacity: 0.5;
+}
+
+.depcycle {
+    font-style: italic;
+}
+
+.highlight-yellow         { background-color: yellow; }
+.highlight-aquamarine     { background-color: aquamarine; }
+.highlight-coral          { background-color: coral; }
+.highlight-lightpink      { background-color: lightpink; }
+.highlight-lightsteelblue { background-color: lightsteelblue; }
+.highlight-palegreen      { background-color: palegreen; }
+.highlight-powderblue     { background-color: powderblue; }
+.highlight-lightgray      { background-color: lightgray; }
+
+.outline-blue           { outline: blue solid 2px; }
+.outline-red            { outline: red solid 2px; }
+.outline-blueviolet     { outline: blueviolet solid 2px; }
+.outline-darkolivegreen { outline: darkolivegreen solid 2px; }
+.outline-fuchsia        { outline: fuchsia solid 2px; }
+.outline-sienna         { outline: sienna solid 2px; }
+.outline-gold           { outline: gold solid 2px; }
+
+</style>
+
+<script type="text/javascript">
+// ordered list of all available highlight colors
+var highlights = [
+    "highlight-yellow",
+    "highlight-aquamarine",
+    "highlight-coral",
+    "highlight-lightpink",
+    "highlight-lightsteelblue",
+    "highlight-palegreen",
+    "highlight-lightgray"
+];
+
+// state: which value is highlighted this color?
+var highlighted = {};
+for (var i = 0; i < highlights.length; i++) {
+    highlighted[highlights[i]] = "";
+}
+
+// ordered list of all available outline colors
+var outlines = [
+    "outline-blue",
+    "outline-red",
+    "outline-blueviolet",
+    "outline-darkolivegreen",
+    "outline-fuchsia",
+    "outline-sienna",
+    "outline-gold"
+];
+
+// state: which value is outlined this color?
+var outlined = {};
+for (var i = 0; i < outlines.length; i++) {
+    outlined[outlines[i]] = "";
+}
+
+window.onload = function() {
+    var ssaElemClicked = function(elem, event, selections, selected) {
+        event.stopPropagation()
+
+        // TODO: pushState with updated state and read it on page load,
+        // so that state can survive across reloads
+
+        // find all values with the same name
+        var c = elem.classList.item(0);
+        var x = document.getElementsByClassName(c);
+
+        // if selected, remove selections from all of them
+        // otherwise, attempt to add
+
+        var remove = "";
+        for (var i = 0; i < selections.length; i++) {
+            var color = selections[i];
+            if (selected[color] == c) {
+                remove = color;
+                break;
+            }
+        }
+
+        if (remove != "") {
+            for (var i = 0; i < x.length; i++) {
+                x[i].classList.remove(remove);
+            }
+            selected[remove] = "";
+            return;
+        }
+
+        // we're adding a selection
+        // find first available color
+        var avail = "";
+        for (var i = 0; i < selections.length; i++) {
+            var color = selections[i];
+            if (selected[color] == "") {
+                avail = color;
+                break;
+            }
+        }
+        if (avail == "") {
+            alert("out of selection colors; go add more");
+            return;
+        }
+
+        // set that as the selection
+        for (var i = 0; i < x.length; i++) {
+            x[i].classList.add(avail);
+        }
+        selected[avail] = c;
+    };
+
+    var ssaValueClicked = function(event) {
+        ssaElemClicked(this, event, highlights, highlighted);
+    }
+
+    var ssaBlockClicked = function(event) {
+        ssaElemClicked(this, event, outlines, outlined);
+    }
+
+    var ssavalues = document.getElementsByClassName("ssa-value");
+    for (var i = 0; i < ssavalues.length; i++) {
+        ssavalues[i].addEventListener('click', ssaValueClicked);
+    }
+
+    var ssalongvalues = document.getElementsByClassName("ssa-long-value");
+    for (var i = 0; i < ssalongvalues.length; i++) {
+        // don't attach listeners to li nodes, just the spans they contain
+        if (ssalongvalues[i].nodeName == "SPAN") {
+            ssalongvalues[i].addEventListener('click', ssaValueClicked);
+        }
+    }
+
+    var ssablocks = document.getElementsByClassName("ssa-block");
+    for (var i = 0; i < ssablocks.length; i++) {
+        ssablocks[i].addEventListener('click', ssaBlockClicked);
+    }
+};
+
+function toggle_visibility(id) {
+   var e = document.getElementById(id);
+   if(e.style.display == 'block')
+      e.style.display = 'none';
+   else
+      e.style.display = 'block';
+}
+</script>
+
+</head>`)
+	// TODO: Add javascript click handlers for blocks
+	// to outline that block across all phases
+	w.WriteString("<body>")
+	w.WriteString("<h1>")
+	w.WriteString(html.EscapeString(name))
+	w.WriteString("</h1>")
+	w.WriteString(`
+<a href="#" onclick="toggle_visibility('help');" id="helplink">help</a>
+<div id="help">
+
+<p>
+Click on a value or block to toggle highlighting of that value/block and its uses.
+Values and blocks are highlighted by ID, which may vary across passes.
+(TODO: Fix this.)
+</p>
+
+<p>
+Faded out values and blocks are dead code that has not been eliminated.
+</p>
+
+<p>
+Values printed in italics have a dependency cycle.
+</p>
+
+</div>
+`)
+	w.WriteString("<table>")
+	w.WriteString("<tr>")
+}
+
+func (w *HTMLWriter) Close() {
+	if w == nil {
+		return
+	}
+	w.WriteString("</tr>")
+	w.WriteString("</table>")
+	w.WriteString("</body>")
+	w.WriteString("</html>")
+	w.File.Close()
+}
+
+// WriteFunc writes f in a column headed by title.
+func (w *HTMLWriter) WriteFunc(title string, f *Func) {
+	if w == nil {
+		return // avoid generating HTML just to discard it
+	}
+	w.WriteColumn(title, f.HTML())
+	// TODO: Add visual representation of f's CFG.
+}
+
+// WriteColumn writes raw HTML in a column headed by title.
+// It is intended for pre- and post-compilation log output.
+func (w *HTMLWriter) WriteColumn(title string, html string) {
+	if w == nil {
+		return
+	}
+	w.WriteString("<td>")
+	w.WriteString("<h2>" + title + "</h2>")
+	w.WriteString(html)
+	w.WriteString("</td>")
+}
+
+func (w *HTMLWriter) Printf(msg string, v ...interface{}) {
+	if _, err := fmt.Fprintf(w.File, msg, v...); err != nil {
+		w.Fatalf(0, "%v", err)
+	}
+}
+
+func (w *HTMLWriter) WriteString(s string) {
+	if _, err := w.File.WriteString(s); err != nil {
+		w.Fatalf(0, "%v", err)
+	}
+}
+
+func (v *Value) HTML() string {
+	// TODO: Using the value ID as the class ignores the fact
+	// that value IDs get recycled and that some values
+	// are transmuted into other values.
+	return fmt.Sprintf("<span class=\"%[1]s ssa-value\">%[1]s</span>", v.String())
+}
+
+func (v *Value) LongHTML() string {
+	// TODO: Any intra-value formatting?
+	// I'm wary of adding too much visual noise,
+	// but a little bit might be valuable.
+	// We already have visual noise in the form of punctuation
+	// maybe we could replace some of that with formatting.
+	s := fmt.Sprintf("<span class=\"%s ssa-long-value\">", v.String())
+	s += fmt.Sprintf("%s = %s", v.HTML(), v.Op.String())
+	s += " &lt;" + html.EscapeString(v.Type.String()) + "&gt;"
+	if v.AuxInt != 0 {
+		s += fmt.Sprintf(" [%d]", v.AuxInt)
+	}
+	if v.Aux != nil {
+		if _, ok := v.Aux.(string); ok {
+			s += html.EscapeString(fmt.Sprintf(" {%q}", v.Aux))
+		} else {
+			s += html.EscapeString(fmt.Sprintf(" {%v}", v.Aux))
+		}
+	}
+	for _, a := range v.Args {
+		s += fmt.Sprintf(" %s", a.HTML())
+	}
+	r := v.Block.Func.RegAlloc
+	if int(v.ID) < len(r) && r[v.ID] != nil {
+		s += " : " + r[v.ID].Name()
+	}
+
+	s += "</span>"
+	return s
+}
+
+func (b *Block) HTML() string {
+	// TODO: Using the value ID as the class ignores the fact
+	// that value IDs get recycled and that some values
+	// are transmuted into other values.
+	return fmt.Sprintf("<span class=\"%[1]s ssa-block\">%[1]s</span>", html.EscapeString(b.String()))
+}
+
+func (b *Block) LongHTML() string {
+	// TODO: improve this for HTML?
+	s := fmt.Sprintf("<span class=\"%s ssa-block\">%s</span>", html.EscapeString(b.String()), html.EscapeString(b.Kind.String()))
+	if b.Aux != nil {
+		s += html.EscapeString(fmt.Sprintf(" {%v}", b.Aux))
+	}
+	if b.Control != nil {
+		s += fmt.Sprintf(" %s", b.Control.HTML())
+	}
+	if len(b.Succs) > 0 {
+		s += " &#8594;" // right arrow
+		for _, c := range b.Succs {
+			s += " " + c.HTML()
+		}
+	}
+	switch b.Likely {
+	case BranchUnlikely:
+		s += " (unlikely)"
+	case BranchLikely:
+		s += " (likely)"
+	}
+	return s
+}
+
+func (f *Func) HTML() string {
+	var buf bytes.Buffer
+	fmt.Fprint(&buf, "<code>")
+	p := htmlFuncPrinter{w: &buf}
+	fprintFunc(p, f)
+
+	// fprintFunc(&buf, f) // TODO: HTML, not text, <br /> for line breaks, etc.
+	fmt.Fprint(&buf, "</code>")
+	return buf.String()
+}
+
+type htmlFuncPrinter struct {
+	w io.Writer
+}
+
+func (p htmlFuncPrinter) header(f *Func) {}
+
+func (p htmlFuncPrinter) startBlock(b *Block, reachable bool) {
+	// TODO: Make blocks collapsable?
+	var dead string
+	if !reachable {
+		dead = "dead-block"
+	}
+	fmt.Fprintf(p.w, "<ul class=\"%s ssa-print-func %s\">", b, dead)
+	fmt.Fprintf(p.w, "<li class=\"ssa-start-block\">%s:", b.HTML())
+	if len(b.Preds) > 0 {
+		io.WriteString(p.w, " &#8592;") // left arrow
+		for _, pred := range b.Preds {
+			fmt.Fprintf(p.w, " %s", pred.HTML())
+		}
+	}
+	io.WriteString(p.w, "</li>")
+	if len(b.Values) > 0 { // start list of values
+		io.WriteString(p.w, "<li class=\"ssa-value-list\">")
+		io.WriteString(p.w, "<ul>")
+	}
+}
+
+func (p htmlFuncPrinter) endBlock(b *Block) {
+	if len(b.Values) > 0 { // end list of values
+		io.WriteString(p.w, "</ul>")
+		io.WriteString(p.w, "</li>")
+	}
+	io.WriteString(p.w, "<li class=\"ssa-end-block\">")
+	fmt.Fprint(p.w, b.LongHTML())
+	io.WriteString(p.w, "</li>")
+	io.WriteString(p.w, "</ul>")
+	// io.WriteString(p.w, "</span>")
+}
+
+func (p htmlFuncPrinter) value(v *Value, live bool) {
+	var dead string
+	if !live {
+		dead = "dead-value"
+	}
+	fmt.Fprintf(p.w, "<li class=\"ssa-long-value %s\">", dead)
+	fmt.Fprint(p.w, v.LongHTML())
+	io.WriteString(p.w, "</li>")
+}
+
+func (p htmlFuncPrinter) startDepCycle() {
+	fmt.Fprintln(p.w, "<span class=\"depcycle\">")
+}
+
+func (p htmlFuncPrinter) endDepCycle() {
+	fmt.Fprintln(p.w, "</span>")
+}
+
+func (p htmlFuncPrinter) named(n LocalSlot, vals []*Value) {
+	// TODO
+}
--- a/src/cmd/compile/internal/ssa/id.go
+++ b/src/cmd/compile/internal/ssa/id.go
@ -0,0 +1,28 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+type ID int32
+
+// idAlloc provides an allocator for unique integers.
+type idAlloc struct {
+	last ID
+}
+
+// get allocates an ID and returns it.
+func (a *idAlloc) get() ID {
+	x := a.last
+	x++
+	if x == 1<<31-1 {
+		panic("too many ids for this function")
+	}
+	a.last = x
+	return x
+}
+
+// num returns the maximum ID ever returned + 1.
+func (a *idAlloc) num() int {
+	return int(a.last + 1)
+}
--- a/src/cmd/compile/internal/ssa/layout.go
+++ b/src/cmd/compile/internal/ssa/layout.go
@ -0,0 +1,102 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// layout orders basic blocks in f with the goal of minimizing control flow instructions.
+// After this phase returns, the order of f.Blocks matters and is the order
+// in which those blocks will appear in the assembly output.
+func layout(f *Func) {
+	order := make([]*Block, 0, f.NumBlocks())
+	scheduled := make([]bool, f.NumBlocks())
+	idToBlock := make([]*Block, f.NumBlocks())
+	indegree := make([]int, f.NumBlocks())
+	posdegree := f.newSparseSet(f.NumBlocks()) // blocks with positive remaining degree
+	defer f.retSparseSet(posdegree)
+	zerodegree := f.newSparseSet(f.NumBlocks()) // blocks with zero remaining degree
+	defer f.retSparseSet(zerodegree)
+
+	// Initialize indegree of each block
+	for _, b := range f.Blocks {
+		idToBlock[b.ID] = b
+		indegree[b.ID] = len(b.Preds)
+		if len(b.Preds) == 0 {
+			zerodegree.add(b.ID)
+		} else {
+			posdegree.add(b.ID)
+		}
+	}
+
+	bid := f.Entry.ID
+blockloop:
+	for {
+		// add block to schedule
+		b := idToBlock[bid]
+		order = append(order, b)
+		scheduled[bid] = true
+		if len(order) == len(f.Blocks) {
+			break
+		}
+
+		for _, c := range b.Succs {
+			indegree[c.ID]--
+			if indegree[c.ID] == 0 {
+				posdegree.remove(c.ID)
+				zerodegree.add(c.ID)
+			}
+		}
+
+		// Pick the next block to schedule
+		// Pick among the successor blocks that have not been scheduled yet.
+
+		// Use likely direction if we have it.
+		var likely *Block
+		switch b.Likely {
+		case BranchLikely:
+			likely = b.Succs[0]
+		case BranchUnlikely:
+			likely = b.Succs[1]
+		}
+		if likely != nil && !scheduled[likely.ID] {
+			bid = likely.ID
+			continue
+		}
+
+		// Use degree for now.
+		bid = 0
+		mindegree := f.NumBlocks()
+		for _, c := range order[len(order)-1].Succs {
+			if scheduled[c.ID] {
+				continue
+			}
+			if indegree[c.ID] < mindegree {
+				mindegree = indegree[c.ID]
+				bid = c.ID
+			}
+		}
+		if bid != 0 {
+			continue
+		}
+		// TODO: improve this part
+		// No successor of the previously scheduled block works.
+		// Pick a zero-degree block if we can.
+		for zerodegree.size() > 0 {
+			cid := zerodegree.pop()
+			if !scheduled[cid] {
+				bid = cid
+				continue blockloop
+			}
+		}
+		// Still nothing, pick any block.
+		for {
+			cid := posdegree.pop()
+			if !scheduled[cid] {
+				bid = cid
+				continue blockloop
+			}
+		}
+		b.Fatalf("no block available for layout")
+	}
+	f.Blocks = order
+}
--- a/src/cmd/compile/internal/ssa/likelyadjust.go
+++ b/src/cmd/compile/internal/ssa/likelyadjust.go
@ -0,0 +1,300 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+)
+
+type loop struct {
+	header *Block // The header node of this (reducible) loop
+	outer  *loop  // loop containing this loop
+	// Next two fields not currently used, but cheap to maintain,
+	// and aid in computation of inner-ness and list of blocks.
+	nBlocks int32 // Number of blocks in this loop but not within inner loops
+	isInner bool  // True if never discovered to contain a loop
+}
+
+// outerinner records that outer contains inner
+func (sdom sparseTree) outerinner(outer, inner *loop) {
+	oldouter := inner.outer
+	if oldouter == nil || sdom.isAncestorEq(oldouter.header, outer.header) {
+		inner.outer = outer
+		outer.isInner = false
+	}
+}
+
+type loopnest struct {
+	f     *Func
+	b2l   []*loop
+	po    []*Block
+	sdom  sparseTree
+	loops []*loop
+}
+
+func min8(a, b int8) int8 {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func max8(a, b int8) int8 {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+const (
+	blDEFAULT = 0
+	blMin     = blDEFAULT
+	blCALL    = 1
+	blRET     = 2
+	blEXIT    = 3
+)
+
+var bllikelies [4]string = [4]string{"default", "call", "ret", "exit"}
+
+func describePredictionAgrees(b *Block, prediction BranchPrediction) string {
+	s := ""
+	if prediction == b.Likely {
+		s = " (agrees with previous)"
+	} else if b.Likely != BranchUnknown {
+		s = " (disagrees with previous, ignored)"
+	}
+	return s
+}
+
+func describeBranchPrediction(f *Func, b *Block, likely, not int8, prediction BranchPrediction) {
+	f.Config.Warnl(int(b.Line), "Branch prediction rule %s < %s%s",
+		bllikelies[likely-blMin], bllikelies[not-blMin], describePredictionAgrees(b, prediction))
+}
+
+func likelyadjust(f *Func) {
+	// The values assigned to certain and local only matter
+	// in their rank order.  0 is default, more positive
+	// is less likely.  It's possible to assign a negative
+	// unlikeliness (though not currently the case).
+	certain := make([]int8, f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit
+	local := make([]int8, f.NumBlocks())   // for our immediate predecessors.
+
+	nest := loopnestfor(f)
+	po := nest.po
+	b2l := nest.b2l
+
+	for _, b := range po {
+		switch b.Kind {
+		case BlockExit:
+			// Very unlikely.
+			local[b.ID] = blEXIT
+			certain[b.ID] = blEXIT
+
+			// Ret, it depends.
+		case BlockRet, BlockRetJmp:
+			local[b.ID] = blRET
+			certain[b.ID] = blRET
+
+			// Calls. TODO not all calls are equal, names give useful clues.
+			// Any name-based heuristics are only relative to other calls,
+			// and less influential than inferences from loop structure.
+		case BlockCall:
+			local[b.ID] = blCALL
+			certain[b.ID] = max8(blCALL, certain[b.Succs[0].ID])
+
+		default:
+			if len(b.Succs) == 1 {
+				certain[b.ID] = certain[b.Succs[0].ID]
+			} else if len(b.Succs) == 2 {
+				// If successor is an unvisited backedge, it's in loop and we don't care.
+				// Its default unlikely is also zero which is consistent with favoring loop edges.
+				// Notice that this can act like a "reset" on unlikeliness at loops; the
+				// default "everything returns" unlikeliness is erased by min with the
+				// backedge likeliness; however a loop with calls on every path will be
+				// tagged with call cost.  Net effect is that loop entry is favored.
+				b0 := b.Succs[0].ID
+				b1 := b.Succs[1].ID
+				certain[b.ID] = min8(certain[b0], certain[b1])
+
+				l := b2l[b.ID]
+				l0 := b2l[b0]
+				l1 := b2l[b1]
+
+				prediction := b.Likely
+				// Weak loop heuristic -- both source and at least one dest are in loops,
+				// and there is a difference in the destinations.
+				// TODO what is best arrangement for nested loops?
+				if l != nil && l0 != l1 {
+					noprediction := false
+					switch {
+					// prefer not to exit loops
+					case l1 == nil:
+						prediction = BranchLikely
+					case l0 == nil:
+						prediction = BranchUnlikely
+
+						// prefer to stay in loop, not exit to outer.
+					case l == l0:
+						prediction = BranchLikely
+					case l == l1:
+						prediction = BranchUnlikely
+					default:
+						noprediction = true
+					}
+					if f.pass.debug > 0 && !noprediction {
+						f.Config.Warnl(int(b.Line), "Branch prediction rule stay in loop%s",
+							describePredictionAgrees(b, prediction))
+					}
+
+				} else {
+					// Lacking loop structure, fall back on heuristics.
+					if certain[b1] > certain[b0] {
+						prediction = BranchLikely
+						if f.pass.debug > 0 {
+							describeBranchPrediction(f, b, certain[b0], certain[b1], prediction)
+						}
+					} else if certain[b0] > certain[b1] {
+						prediction = BranchUnlikely
+						if f.pass.debug > 0 {
+							describeBranchPrediction(f, b, certain[b1], certain[b0], prediction)
+						}
+					} else if local[b1] > local[b0] {
+						prediction = BranchLikely
+						if f.pass.debug > 0 {
+							describeBranchPrediction(f, b, local[b0], local[b1], prediction)
+						}
+					} else if local[b0] > local[b1] {
+						prediction = BranchUnlikely
+						if f.pass.debug > 0 {
+							describeBranchPrediction(f, b, local[b1], local[b0], prediction)
+						}
+					}
+				}
+				if b.Likely != prediction {
+					if b.Likely == BranchUnknown {
+						b.Likely = prediction
+					}
+				}
+			}
+		}
+		if f.pass.debug > 2 {
+			f.Config.Warnl(int(b.Line), "BP: Block %s, local=%s, certain=%s", b, bllikelies[local[b.ID]-blMin], bllikelies[certain[b.ID]-blMin])
+		}
+
+	}
+}
+
+func (l *loop) String() string {
+	return fmt.Sprintf("hdr:%s", l.header)
+}
+
+func (l *loop) LongString() string {
+	i := ""
+	o := ""
+	if l.isInner {
+		i = ", INNER"
+	}
+	if l.outer != nil {
+		o = ", o=" + l.outer.header.String()
+	}
+	return fmt.Sprintf("hdr:%s%s%s", l.header, i, o)
+}
+
+// nearestOuterLoop returns the outer loop of loop most nearly
+// containing block b; the header must dominate b.  loop itself
+// is assumed to not be that loop.  For acceptable performance,
+// we're relying on loop nests to not be terribly deep.
+func (l *loop) nearestOuterLoop(sdom sparseTree, b *Block) *loop {
+	var o *loop
+	for o = l.outer; o != nil && !sdom.isAncestorEq(o.header, b); o = o.outer {
+	}
+	return o
+}
+
+func loopnestfor(f *Func) *loopnest {
+	po := postorder(f)
+	dom := dominators(f)
+	sdom := newSparseTree(f, dom)
+	b2l := make([]*loop, f.NumBlocks())
+	loops := make([]*loop, 0)
+
+	// Reducible-loop-nest-finding.
+	for _, b := range po {
+		if f.pass.debug > 3 {
+			fmt.Printf("loop finding (0) at %s\n", b)
+		}
+
+		var innermost *loop // innermost header reachable from this block
+
+		// IF any successor s of b is in a loop headed by h
+		// AND h dominates b
+		// THEN b is in the loop headed by h.
+		//
+		// Choose the first/innermost such h.
+		//
+		// IF s itself dominates b, the s is a loop header;
+		// and there may be more than one such s.
+		// Since there's at most 2 successors, the inner/outer ordering
+		// between them can be established with simple comparisons.
+		for _, bb := range b.Succs {
+			l := b2l[bb.ID]
+
+			if sdom.isAncestorEq(bb, b) { // Found a loop header
+				if l == nil {
+					l = &loop{header: bb, isInner: true}
+					loops = append(loops, l)
+					b2l[bb.ID] = l
+				}
+			} else { // Perhaps a loop header is inherited.
+				// is there any loop containing our successor whose
+				// header dominates b?
+				if l != nil && !sdom.isAncestorEq(l.header, b) {
+					l = l.nearestOuterLoop(sdom, b)
+				}
+			}
+
+			if l == nil || innermost == l {
+				continue
+			}
+
+			if innermost == nil {
+				innermost = l
+				continue
+			}
+
+			if sdom.isAncestor(innermost.header, l.header) {
+				sdom.outerinner(innermost, l)
+				innermost = l
+			} else if sdom.isAncestor(l.header, innermost.header) {
+				sdom.outerinner(l, innermost)
+			}
+		}
+
+		if innermost != nil {
+			b2l[b.ID] = innermost
+			innermost.nBlocks++
+		}
+	}
+	if f.pass.debug > 1 && len(loops) > 0 {
+		fmt.Printf("Loops in %s:\n", f.Name)
+		for _, l := range loops {
+			fmt.Printf("%s, b=", l.LongString())
+			for _, b := range f.Blocks {
+				if b2l[b.ID] == l {
+					fmt.Printf(" %s", b)
+				}
+			}
+			fmt.Print("\n")
+		}
+		fmt.Printf("Nonloop blocks in %s:", f.Name)
+		for _, b := range f.Blocks {
+			if b2l[b.ID] == nil {
+				fmt.Printf(" %s", b)
+			}
+		}
+		fmt.Print("\n")
+	}
+	return &loopnest{f, b2l, po, sdom, loops}
+}
--- a/src/cmd/compile/internal/ssa/location.go
+++ b/src/cmd/compile/internal/ssa/location.go
@ -0,0 +1,38 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "fmt"
+
+// A place that an ssa variable can reside.
+type Location interface {
+	Name() string // name to use in assembly templates: %rax, 16(%rsp), ...
+}
+
+// A Register is a machine register, like %rax.
+// They are numbered densely from 0 (for each architecture).
+type Register struct {
+	Num  int32
+	name string
+}
+
+func (r *Register) Name() string {
+	return r.name
+}
+
+// A LocalSlot is a location in the stack frame.
+// It is (possibly a subpiece of) a PPARAM, PPARAMOUT, or PAUTO ONAME node.
+type LocalSlot struct {
+	N    GCNode // an ONAME *gc.Node representing a variable on the stack
+	Type Type   // type of slot
+	Off  int64  // offset of slot in N
+}
+
+func (s LocalSlot) Name() string {
+	if s.Off == 0 {
+		return fmt.Sprintf("%s[%s]", s.N, s.Type)
+	}
+	return fmt.Sprintf("%s+%d[%s]", s.N, s.Off, s.Type)
+}
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@ -0,0 +1,34 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// convert to machine-dependent ops
+func lower(f *Func) {
+	// repeat rewrites until we find no more rewrites
+	applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue)
+}
+
+// checkLower checks for unlowered opcodes and fails if we find one.
+func checkLower(f *Func) {
+	// Needs to be a separate phase because it must run after both
+	// lowering and a subsequent dead code elimination (because lowering
+	// rules may leave dead generic ops behind).
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if !opcodeTable[v.Op].generic {
+				continue // lowered
+			}
+			switch v.Op {
+			case OpSP, OpSB, OpInitMem, OpArg, OpPhi, OpVarDef, OpVarKill, OpVarLive:
+				continue // ok not to lower
+			}
+			s := "not lowered: " + v.Op.String() + " " + v.Type.SimpleString()
+			for _, a := range v.Args {
+				s += " " + a.Type.SimpleString()
+			}
+			f.Unimplementedf("%s", s)
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/magic.go
+++ b/src/cmd/compile/internal/ssa/magic.go
@ -0,0 +1,260 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// A copy of the code in ../gc/subr.go.
+// We can't use it directly because it would generate
+// an import cycle.  TODO: move to a common support package.
+
+// argument passing to/from
+// smagic and umagic
+type magic struct {
+	W   int // input for both - width
+	S   int // output for both - shift
+	Bad int // output for both - unexpected failure
+
+	// magic multiplier for signed literal divisors
+	Sd int64 // input - literal divisor
+	Sm int64 // output - multiplier
+
+	// magic multiplier for unsigned literal divisors
+	Ud uint64 // input - literal divisor
+	Um uint64 // output - multiplier
+	Ua int    // output - adder
+}
+
+// magic number for signed division
+// see hacker's delight chapter 10
+func smagic(m *magic) {
+	var mask uint64
+
+	m.Bad = 0
+	switch m.W {
+	default:
+		m.Bad = 1
+		return
+
+	case 8:
+		mask = 0xff
+
+	case 16:
+		mask = 0xffff
+
+	case 32:
+		mask = 0xffffffff
+
+	case 64:
+		mask = 0xffffffffffffffff
+	}
+
+	two31 := mask ^ (mask >> 1)
+
+	p := m.W - 1
+	ad := uint64(m.Sd)
+	if m.Sd < 0 {
+		ad = -uint64(m.Sd)
+	}
+
+	// bad denominators
+	if ad == 0 || ad == 1 || ad == two31 {
+		m.Bad = 1
+		return
+	}
+
+	t := two31
+	ad &= mask
+
+	anc := t - 1 - t%ad
+	anc &= mask
+
+	q1 := two31 / anc
+	r1 := two31 - q1*anc
+	q1 &= mask
+	r1 &= mask
+
+	q2 := two31 / ad
+	r2 := two31 - q2*ad
+	q2 &= mask
+	r2 &= mask
+
+	var delta uint64
+	for {
+		p++
+		q1 <<= 1
+		r1 <<= 1
+		q1 &= mask
+		r1 &= mask
+		if r1 >= anc {
+			q1++
+			r1 -= anc
+			q1 &= mask
+			r1 &= mask
+		}
+
+		q2 <<= 1
+		r2 <<= 1
+		q2 &= mask
+		r2 &= mask
+		if r2 >= ad {
+			q2++
+			r2 -= ad
+			q2 &= mask
+			r2 &= mask
+		}
+
+		delta = ad - r2
+		delta &= mask
+		if q1 < delta || (q1 == delta && r1 == 0) {
+			continue
+		}
+
+		break
+	}
+
+	m.Sm = int64(q2 + 1)
+	if uint64(m.Sm)&two31 != 0 {
+		m.Sm |= ^int64(mask)
+	}
+	m.S = p - m.W
+}
+
+// magic number for unsigned division
+// see hacker's delight chapter 10
+func umagic(m *magic) {
+	var mask uint64
+
+	m.Bad = 0
+	m.Ua = 0
+
+	switch m.W {
+	default:
+		m.Bad = 1
+		return
+
+	case 8:
+		mask = 0xff
+
+	case 16:
+		mask = 0xffff
+
+	case 32:
+		mask = 0xffffffff
+
+	case 64:
+		mask = 0xffffffffffffffff
+	}
+
+	two31 := mask ^ (mask >> 1)
+
+	m.Ud &= mask
+	if m.Ud == 0 || m.Ud == two31 {
+		m.Bad = 1
+		return
+	}
+
+	nc := mask - (-m.Ud&mask)%m.Ud
+	p := m.W - 1
+
+	q1 := two31 / nc
+	r1 := two31 - q1*nc
+	q1 &= mask
+	r1 &= mask
+
+	q2 := (two31 - 1) / m.Ud
+	r2 := (two31 - 1) - q2*m.Ud
+	q2 &= mask
+	r2 &= mask
+
+	var delta uint64
+	for {
+		p++
+		if r1 >= nc-r1 {
+			q1 <<= 1
+			q1++
+			r1 <<= 1
+			r1 -= nc
+		} else {
+			q1 <<= 1
+			r1 <<= 1
+		}
+
+		q1 &= mask
+		r1 &= mask
+		if r2+1 >= m.Ud-r2 {
+			if q2 >= two31-1 {
+				m.Ua = 1
+			}
+
+			q2 <<= 1
+			q2++
+			r2 <<= 1
+			r2++
+			r2 -= m.Ud
+		} else {
+			if q2 >= two31 {
+				m.Ua = 1
+			}
+
+			q2 <<= 1
+			r2 <<= 1
+			r2++
+		}
+
+		q2 &= mask
+		r2 &= mask
+
+		delta = m.Ud - 1 - r2
+		delta &= mask
+
+		if p < m.W+m.W {
+			if q1 < delta || (q1 == delta && r1 == 0) {
+				continue
+			}
+		}
+
+		break
+	}
+
+	m.Um = q2 + 1
+	m.S = p - m.W
+}
+
+// adaptors for use by rewrite rules
+func smagic64ok(d int64) bool {
+	m := magic{W: 64, Sd: d}
+	smagic(&m)
+	return m.Bad == 0
+}
+func smagic64m(d int64) int64 {
+	m := magic{W: 64, Sd: d}
+	smagic(&m)
+	return m.Sm
+}
+func smagic64s(d int64) int64 {
+	m := magic{W: 64, Sd: d}
+	smagic(&m)
+	return int64(m.S)
+}
+
+func umagic64ok(d int64) bool {
+	m := magic{W: 64, Ud: uint64(d)}
+	umagic(&m)
+	return m.Bad == 0
+}
+func umagic64m(d int64) int64 {
+	m := magic{W: 64, Ud: uint64(d)}
+	umagic(&m)
+	return int64(m.Um)
+}
+func umagic64s(d int64) int64 {
+	m := magic{W: 64, Ud: uint64(d)}
+	umagic(&m)
+	return int64(m.S)
+}
+func umagic64a(d int64) bool {
+	m := magic{W: 64, Ud: uint64(d)}
+	umagic(&m)
+	return m.Ua != 0
+}
--- a/src/cmd/compile/internal/ssa/nilcheck.go
+++ b/src/cmd/compile/internal/ssa/nilcheck.go
@ -0,0 +1,163 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// TODO: return value from newobject/newarray is non-nil.
+
+// nilcheckelim eliminates unnecessary nil checks.
+func nilcheckelim(f *Func) {
+	// A nil check is redundant if the same nil check was successful in a
+	// dominating block. The efficacy of this pass depends heavily on the
+	// efficacy of the cse pass.
+	idom := dominators(f)
+	domTree := make([][]*Block, f.NumBlocks())
+
+	// Create a block ID -> [dominees] mapping
+	for _, b := range f.Blocks {
+		if dom := idom[b.ID]; dom != nil {
+			domTree[dom.ID] = append(domTree[dom.ID], b)
+		}
+	}
+
+	// TODO: Eliminate more nil checks.
+	// We can recursively remove any chain of fixed offset calculations,
+	// i.e. struct fields and array elements, even with non-constant
+	// indices: x is non-nil iff x.a.b[i].c is.
+
+	type walkState int
+	const (
+		Work   walkState = iota // clear nil check if we should and traverse to dominees regardless
+		RecPtr                  // record the pointer as being nil checked
+		ClearPtr
+	)
+
+	type bp struct {
+		block *Block // block, or nil in RecPtr/ClearPtr state
+		ptr   *Value // if non-nil, ptr that is to be set/cleared in RecPtr/ClearPtr state
+		op    walkState
+	}
+
+	work := make([]bp, 0, 256)
+	work = append(work, bp{block: f.Entry})
+
+	// map from value ID to bool indicating if value is known to be non-nil
+	// in the current dominator path being walked.  This slice is updated by
+	// walkStates to maintain the known non-nil values.
+	nonNilValues := make([]bool, f.NumValues())
+
+	// make an initial pass identifying any non-nil values
+	for _, b := range f.Blocks {
+		// a value resulting from taking the address of a
+		// value, or a value constructed from an offset of a
+		// non-nil ptr (OpAddPtr) implies it is non-nil
+		for _, v := range b.Values {
+			if v.Op == OpAddr || v.Op == OpAddPtr {
+				nonNilValues[v.ID] = true
+			} else if v.Op == OpPhi {
+				// phis whose arguments are all non-nil
+				// are non-nil
+				argsNonNil := true
+				for _, a := range v.Args {
+					if !nonNilValues[a.ID] {
+						argsNonNil = false
+					}
+				}
+				if argsNonNil {
+					nonNilValues[v.ID] = true
+				}
+			}
+		}
+	}
+
+	// perform a depth first walk of the dominee tree
+	for len(work) > 0 {
+		node := work[len(work)-1]
+		work = work[:len(work)-1]
+
+		switch node.op {
+		case Work:
+			checked := checkedptr(node.block) // ptr being checked for nil/non-nil
+			nonnil := nonnilptr(node.block)   // ptr that is non-nil due to this blocks pred
+
+			if checked != nil {
+				// already have a nilcheck in the dominator path, or this block is a success
+				// block for the same value it is checking
+				if nonNilValues[checked.ID] || checked == nonnil {
+					// Eliminate the nil check.
+					// The deadcode pass will remove vestigial values,
+					// and the fuse pass will join this block with its successor.
+
+					// Logging in the style of the former compiler -- and omit line 1,
+					// which is usually in generated code.
+					if f.Config.Debug_checknil() && int(node.block.Control.Line) > 1 {
+						f.Config.Warnl(int(node.block.Control.Line), "removed nil check")
+					}
+
+					switch node.block.Kind {
+					case BlockIf:
+						node.block.Kind = BlockFirst
+						node.block.Control = nil
+					case BlockCheck:
+						node.block.Kind = BlockPlain
+						node.block.Control = nil
+					default:
+						f.Fatalf("bad block kind in nilcheck %s", node.block.Kind)
+					}
+				}
+			}
+
+			if nonnil != nil && !nonNilValues[nonnil.ID] {
+				// this is a new nilcheck so add a ClearPtr node to clear the
+				// ptr from the map of nil checks once we traverse
+				// back up the tree
+				work = append(work, bp{op: ClearPtr, ptr: nonnil})
+			}
+
+			// add all dominated blocks to the work list
+			for _, w := range domTree[node.block.ID] {
+				work = append(work, bp{block: w})
+			}
+
+			if nonnil != nil && !nonNilValues[nonnil.ID] {
+				work = append(work, bp{op: RecPtr, ptr: nonnil})
+			}
+		case RecPtr:
+			nonNilValues[node.ptr.ID] = true
+			continue
+		case ClearPtr:
+			nonNilValues[node.ptr.ID] = false
+			continue
+		}
+	}
+}
+
+// checkedptr returns the Value, if any,
+// that is used in a nil check in b's Control op.
+func checkedptr(b *Block) *Value {
+	if b.Kind == BlockCheck {
+		return b.Control.Args[0]
+	}
+	if b.Kind == BlockIf && b.Control.Op == OpIsNonNil {
+		return b.Control.Args[0]
+	}
+	return nil
+}
+
+// nonnilptr returns the Value, if any,
+// that is non-nil due to b being the successor block
+// of an OpIsNonNil or OpNilCheck block for the value and having a single
+// predecessor.
+func nonnilptr(b *Block) *Value {
+	if len(b.Preds) == 1 {
+		bp := b.Preds[0]
+		if bp.Kind == BlockCheck {
+			return bp.Control.Args[0]
+		}
+		if bp.Kind == BlockIf && bp.Control.Op == OpIsNonNil && bp.Succs[0] == b {
+			return bp.Control.Args[0]
+		}
+	}
+	return nil
+}
--- a/src/cmd/compile/internal/ssa/nilcheck_test.go
+++ b/src/cmd/compile/internal/ssa/nilcheck_test.go
@ -0,0 +1,433 @@
+package ssa
+
+import (
+	"strconv"
+	"testing"
+)
+
+func BenchmarkNilCheckDeep1(b *testing.B)     { benchmarkNilCheckDeep(b, 1) }
+func BenchmarkNilCheckDeep10(b *testing.B)    { benchmarkNilCheckDeep(b, 10) }
+func BenchmarkNilCheckDeep100(b *testing.B)   { benchmarkNilCheckDeep(b, 100) }
+func BenchmarkNilCheckDeep1000(b *testing.B)  { benchmarkNilCheckDeep(b, 1000) }
+func BenchmarkNilCheckDeep10000(b *testing.B) { benchmarkNilCheckDeep(b, 10000) }
+
+// benchmarkNilCheckDeep is a stress test of nilcheckelim.
+// It uses the worst possible input: A linear string of
+// nil checks, none of which can be eliminated.
+// Run with multiple depths to observe big-O behavior.
+func benchmarkNilCheckDeep(b *testing.B, depth int) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto(blockn(0)),
+		),
+	)
+	for i := 0; i < depth; i++ {
+		blocs = append(blocs,
+			Bloc(blockn(i),
+				Valu(ptrn(i), OpAddr, ptrType, 0, nil, "sb"),
+				Valu(booln(i), OpIsNonNil, TypeBool, 0, nil, ptrn(i)),
+				If(booln(i), blockn(i+1), "exit"),
+			),
+		)
+	}
+	blocs = append(blocs,
+		Bloc(blockn(depth), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	c := NewConfig("amd64", DummyFrontend{b}, nil, true)
+	fun := Fun(c, "entry", blocs...)
+
+	CheckFunc(fun.f)
+	b.SetBytes(int64(depth)) // helps for eyeballing linearity
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		nilcheckelim(fun.f)
+	}
+}
+
+func blockn(n int) string { return "b" + strconv.Itoa(n) }
+func ptrn(n int) string   { return "p" + strconv.Itoa(n) }
+func booln(n int) string  { return "c" + strconv.Itoa(n) }
+
+func isNilCheck(b *Block) bool {
+	return b.Kind == BlockIf && b.Control.Op == OpIsNonNil
+}
+
+// TestNilcheckSimple verifies that a second repeated nilcheck is removed.
+func TestNilcheckSimple(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "secondCheck", "exit")),
+		Bloc("secondCheck",
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool2", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			t.Errorf("secondCheck was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckDomOrder ensures that the nil check elimination isn't dependant
+// on the order of the dominees.
+func TestNilcheckDomOrder(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "secondCheck", "exit")),
+		Bloc("exit",
+			Exit("mem")),
+		Bloc("secondCheck",
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool2", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			t.Errorf("secondCheck was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckAddr verifies that nilchecks of OpAddr constructed values are removed.
+func TestNilcheckAddr(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpAddr, ptrType, 0, nil, "sb"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["checkPtr"] && isNilCheck(b) {
+			t.Errorf("checkPtr was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckAddPtr verifies that nilchecks of OpAddPtr constructed values are removed.
+func TestNilcheckAddPtr(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("off", OpConst64, TypeInt64, 20, nil),
+			Valu("ptr1", OpAddPtr, ptrType, 0, nil, "sb", "off"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["checkPtr"] && isNilCheck(b) {
+			t.Errorf("checkPtr was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckPhi tests that nil checks of phis, for which all values are known to be
+// non-nil are removed.
+func TestNilcheckPhi(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Valu("sp", OpSP, TypeInvalid, 0, nil),
+			Valu("baddr", OpAddr, TypeBool, 0, "b", "sp"),
+			Valu("bool1", OpLoad, TypeBool, 0, nil, "baddr", "mem"),
+			If("bool1", "b1", "b2")),
+		Bloc("b1",
+			Valu("ptr1", OpAddr, ptrType, 0, nil, "sb"),
+			Goto("checkPtr")),
+		Bloc("b2",
+			Valu("ptr2", OpAddr, ptrType, 0, nil, "sb"),
+			Goto("checkPtr")),
+		// both ptr1 and ptr2 are guaranteed non-nil here
+		Bloc("checkPtr",
+			Valu("phi", OpPhi, ptrType, 0, nil, "ptr1", "ptr2"),
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "phi"),
+			If("bool2", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["checkPtr"] && isNilCheck(b) {
+			t.Errorf("checkPtr was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckKeepRemove verifies that duplicate checks of the same pointer
+// are removed, but checks of different pointers are not.
+func TestNilcheckKeepRemove(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "differentCheck", "exit")),
+		Bloc("differentCheck",
+			Valu("ptr2", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr2"),
+			If("bool2", "secondCheck", "exit")),
+		Bloc("secondCheck",
+			Valu("bool3", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool3", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	foundDifferentCheck := false
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			t.Errorf("secondCheck was not eliminated")
+		}
+		if b == fun.blocks["differentCheck"] && isNilCheck(b) {
+			foundDifferentCheck = true
+		}
+	}
+	if !foundDifferentCheck {
+		t.Errorf("removed differentCheck, but shouldn't have")
+	}
+}
+
+// TestNilcheckInFalseBranch tests that nil checks in the false branch of an nilcheck
+// block are *not* removed.
+func TestNilcheckInFalseBranch(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("bool1", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool1", "extra", "secondCheck")),
+		Bloc("secondCheck",
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool2", "extra", "thirdCheck")),
+		Bloc("thirdCheck",
+			Valu("bool3", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool3", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	foundSecondCheck := false
+	foundThirdCheck := false
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			foundSecondCheck = true
+		}
+		if b == fun.blocks["thirdCheck"] && isNilCheck(b) {
+			foundThirdCheck = true
+		}
+	}
+	if !foundSecondCheck {
+		t.Errorf("removed secondCheck, but shouldn't have [false branch]")
+	}
+	if !foundThirdCheck {
+		t.Errorf("removed thirdCheck, but shouldn't have [false branch]")
+	}
+}
+
+// TestNilcheckUser verifies that a user nil check that dominates a generated nil check
+// wil remove the generated nil check.
+func TestNilcheckUser(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("nilptr", OpConstNil, ptrType, 0, nil),
+			Valu("bool1", OpNeqPtr, TypeBool, 0, nil, "ptr1", "nilptr"),
+			If("bool1", "secondCheck", "exit")),
+		Bloc("secondCheck",
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool2", "extra", "exit")),
+		Bloc("extra",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	// we need the opt here to rewrite the user nilcheck
+	opt(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			t.Errorf("secondCheck was not eliminated")
+		}
+	}
+}
+
+// TestNilcheckBug reproduces a bug in nilcheckelim found by compiling math/big
+func TestNilcheckBug(t *testing.T) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+	c := NewConfig("amd64", DummyFrontend{t}, nil, true)
+	fun := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto("checkPtr")),
+		Bloc("checkPtr",
+			Valu("ptr1", OpLoad, ptrType, 0, nil, "sb", "mem"),
+			Valu("nilptr", OpConstNil, ptrType, 0, nil),
+			Valu("bool1", OpNeqPtr, TypeBool, 0, nil, "ptr1", "nilptr"),
+			If("bool1", "secondCheck", "couldBeNil")),
+		Bloc("couldBeNil",
+			Goto("secondCheck")),
+		Bloc("secondCheck",
+			Valu("bool2", OpIsNonNil, TypeBool, 0, nil, "ptr1"),
+			If("bool2", "extra", "exit")),
+		Bloc("extra",
+			// prevent fuse from eliminating this block
+			Valu("store", OpStore, TypeMem, 8, nil, "ptr1", "nilptr", "mem"),
+			Goto("exit")),
+		Bloc("exit",
+			Valu("phi", OpPhi, TypeMem, 0, nil, "mem", "store"),
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	// we need the opt here to rewrite the user nilcheck
+	opt(fun.f)
+	nilcheckelim(fun.f)
+
+	// clean up the removed nil check
+	fuse(fun.f)
+	deadcode(fun.f)
+
+	CheckFunc(fun.f)
+	foundSecondCheck := false
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["secondCheck"] && isNilCheck(b) {
+			foundSecondCheck = true
+		}
+	}
+	if !foundSecondCheck {
+		t.Errorf("secondCheck was eliminated, but shouldn't have")
+	}
+}
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@ -0,0 +1,118 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "fmt"
+
+// An Op encodes the specific operation that a Value performs.
+// Opcodes' semantics can be modified by the type and aux fields of the Value.
+// For instance, OpAdd can be 32 or 64 bit, signed or unsigned, float or complex, depending on Value.Type.
+// Semantics of each op are described in the opcode files in gen/*Ops.go.
+// There is one file for generic (architecture-independent) ops and one file
+// for each architecture.
+type Op int32
+
+type opInfo struct {
+	name              string
+	asm               int
+	reg               regInfo
+	auxType           auxType
+	argLen            int32 // the number of arugments, -1 if variable length
+	generic           bool  // this is a generic (arch-independent) opcode
+	rematerializeable bool  // this op is rematerializeable
+	commutative       bool  // this operation is commutative (e.g. addition)
+}
+
+type inputInfo struct {
+	idx  int     // index in Args array
+	regs regMask // allowed input registers
+}
+
+type regInfo struct {
+	inputs   []inputInfo // ordered in register allocation order
+	clobbers regMask
+	outputs  []regMask // NOTE: values can only have 1 output for now.
+}
+
+type auxType int8
+
+const (
+	auxNone         auxType = iota
+	auxBool                 // auxInt is 0/1 for false/true
+	auxInt8                 // auxInt is an 8-bit integer
+	auxInt16                // auxInt is a 16-bit integer
+	auxInt32                // auxInt is a 32-bit integer
+	auxInt64                // auxInt is a 64-bit integer
+	auxFloat                // auxInt is a float64 (encoded with math.Float64bits)
+	auxString               // auxInt is a string
+	auxSym                  // aux is a symbol
+	auxSymOff               // aux is a symbol, auxInt is an offset
+	auxSymValAndOff         // aux is a symbol, auxInt is a ValAndOff
+)
+
+// A ValAndOff is used by the several opcodes.  It holds
+// both a value and a pointer offset.
+// A ValAndOff is intended to be encoded into an AuxInt field.
+// The zero ValAndOff encodes a value of 0 and an offset of 0.
+// The high 32 bits hold a value.
+// The low 32 bits hold a pointer offset.
+type ValAndOff int64
+
+func (x ValAndOff) Val() int64 {
+	return int64(x) >> 32
+}
+func (x ValAndOff) Off() int64 {
+	return int64(int32(x))
+}
+func (x ValAndOff) Int64() int64 {
+	return int64(x)
+}
+func (x ValAndOff) String() string {
+	return fmt.Sprintf("val=%d,off=%d", x.Val(), x.Off())
+}
+
+// validVal reports whether the value can be used
+// as an argument to makeValAndOff.
+func validVal(val int64) bool {
+	return val == int64(int32(val))
+}
+
+// validOff reports whether the offset can be used
+// as an argument to makeValAndOff.
+func validOff(off int64) bool {
+	return off == int64(int32(off))
+}
+
+// validValAndOff reports whether we can fit the value and offset into
+// a ValAndOff value.
+func validValAndOff(val, off int64) bool {
+	if !validVal(val) {
+		return false
+	}
+	if !validOff(off) {
+		return false
+	}
+	return true
+}
+
+// makeValAndOff encodes a ValAndOff into an int64 suitable for storing in an AuxInt field.
+func makeValAndOff(val, off int64) int64 {
+	if !validValAndOff(val, off) {
+		panic("invalid makeValAndOff")
+	}
+	return ValAndOff(val<<32 + int64(uint32(off))).Int64()
+}
+
+func (x ValAndOff) canAdd(off int64) bool {
+	newoff := x.Off() + off
+	return newoff == int64(int32(newoff))
+}
+
+func (x ValAndOff) add(off int64) int64 {
+	if !x.canAdd(off) {
+		panic("invalid ValAndOff.add")
+	}
+	return makeValAndOff(x.Val(), x.Off()+off)
+}
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/opt.go
+++ b/src/cmd/compile/internal/ssa/opt.go
@ -0,0 +1,10 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// machine-independent optimization
+func opt(f *Func) {
+	applyRewrite(f, rewriteBlockgeneric, rewriteValuegeneric)
+}
--- a/src/cmd/compile/internal/ssa/passbm_test.go
+++ b/src/cmd/compile/internal/ssa/passbm_test.go
@ -0,0 +1,101 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package ssa
+
+import (
+	"fmt"
+	"testing"
+)
+
+const (
+	blockCount = 1000
+	passCount  = 15000
+)
+
+type passFunc func(*Func)
+
+func BenchmarkDSEPass(b *testing.B)           { benchFnPass(b, dse, blockCount, genFunction) }
+func BenchmarkDSEPassBlock(b *testing.B)      { benchFnBlock(b, dse, genFunction) }
+func BenchmarkCSEPass(b *testing.B)           { benchFnPass(b, cse, blockCount, genFunction) }
+func BenchmarkCSEPassBlock(b *testing.B)      { benchFnBlock(b, cse, genFunction) }
+func BenchmarkDeadcodePass(b *testing.B)      { benchFnPass(b, deadcode, blockCount, genFunction) }
+func BenchmarkDeadcodePassBlock(b *testing.B) { benchFnBlock(b, deadcode, genFunction) }
+
+func multi(f *Func) {
+	cse(f)
+	dse(f)
+	deadcode(f)
+}
+func BenchmarkMultiPass(b *testing.B)      { benchFnPass(b, multi, blockCount, genFunction) }
+func BenchmarkMultiPassBlock(b *testing.B) { benchFnBlock(b, multi, genFunction) }
+
+// benchFnPass runs passFunc b.N times across a single function.
+func benchFnPass(b *testing.B, fn passFunc, size int, bg blockGen) {
+	b.ReportAllocs()
+	c := NewConfig("amd64", DummyFrontend{b}, nil, true)
+	fun := Fun(c, "entry", bg(size)...)
+
+	CheckFunc(fun.f)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		fn(fun.f)
+		b.StopTimer()
+		CheckFunc(fun.f)
+		b.StartTimer()
+	}
+}
+
+// benchFnPass runs passFunc across a function with b.N blocks.
+func benchFnBlock(b *testing.B, fn passFunc, bg blockGen) {
+	b.ReportAllocs()
+	c := NewConfig("amd64", DummyFrontend{b}, nil, true)
+	fun := Fun(c, "entry", bg(b.N)...)
+
+	CheckFunc(fun.f)
+	b.ResetTimer()
+	for i := 0; i < passCount; i++ {
+		fn(fun.f)
+	}
+	b.StopTimer()
+}
+
+func genFunction(size int) []bloc {
+	var blocs []bloc
+	elemType := &TypeImpl{Size_: 8, Name: "testtype"}
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr", Elem_: elemType} // dummy for testing
+
+	valn := func(s string, m, n int) string { return fmt.Sprintf("%s%d-%d", s, m, n) }
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu(valn("store", 0, 4), OpInitMem, TypeMem, 0, nil),
+			Valu("sb", OpSB, TypeInvalid, 0, nil),
+			Goto(blockn(1)),
+		),
+	)
+	for i := 1; i < size+1; i++ {
+		blocs = append(blocs, Bloc(blockn(i),
+			Valu(valn("v", i, 0), OpConstBool, TypeBool, 1, nil),
+			Valu(valn("addr", i, 1), OpAddr, ptrType, 0, nil, "sb"),
+			Valu(valn("addr", i, 2), OpAddr, ptrType, 0, nil, "sb"),
+			Valu(valn("addr", i, 3), OpAddr, ptrType, 0, nil, "sb"),
+			Valu(valn("zero", i, 1), OpZero, TypeMem, 8, nil, valn("addr", i, 3),
+				valn("store", i-1, 4)),
+			Valu(valn("store", i, 1), OpStore, TypeMem, 0, nil, valn("addr", i, 1),
+				valn("v", i, 0), valn("zero", i, 1)),
+			Valu(valn("store", i, 2), OpStore, TypeMem, 0, nil, valn("addr", i, 2),
+				valn("v", i, 0), valn("store", i, 1)),
+			Valu(valn("store", i, 3), OpStore, TypeMem, 0, nil, valn("addr", i, 1),
+				valn("v", i, 0), valn("store", i, 2)),
+			Valu(valn("store", i, 4), OpStore, TypeMem, 0, nil, valn("addr", i, 3),
+				valn("v", i, 0), valn("store", i, 3)),
+			Goto(blockn(i+1))))
+	}
+
+	blocs = append(blocs,
+		Bloc(blockn(size+1), Goto("exit")),
+		Bloc("exit", Exit("store0-4")),
+	)
+
+	return blocs
+}
--- a/src/cmd/compile/internal/ssa/phielim.go
+++ b/src/cmd/compile/internal/ssa/phielim.go
@ -0,0 +1,68 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// phielim eliminates redundant phi values from f.
+// A phi is redundant if its arguments are all equal.  For
+// purposes of counting, ignore the phi itself.  Both of
+// these phis are redundant:
+//   v = phi(x,x,x)
+//   v = phi(x,v,x,v)
+// We repeat this process to also catch situations like:
+//   v = phi(x, phi(x, x), phi(x, v))
+// TODO: Can we also simplify cases like:
+//   v = phi(v, w, x)
+//   w = phi(v, w, x)
+// and would that be useful?
+func phielim(f *Func) {
+	for {
+		change := false
+		for _, b := range f.Blocks {
+			for _, v := range b.Values {
+				copyelimValue(v)
+				change = phielimValue(v) || change
+			}
+		}
+		if !change {
+			break
+		}
+	}
+}
+
+func phielimValue(v *Value) bool {
+	if v.Op != OpPhi {
+		return false
+	}
+
+	// If there are two distinct args of v which
+	// are not v itself, then the phi must remain.
+	// Otherwise, we can replace it with a copy.
+	var w *Value
+	for i, x := range v.Args {
+		if b := v.Block.Preds[i]; b.Kind == BlockFirst && b.Succs[1] == v.Block {
+			// This branch is never taken so we can just eliminate it.
+			continue
+		}
+		if x == v {
+			continue
+		}
+		if x == w {
+			continue
+		}
+		if w != nil {
+			return false
+		}
+		w = x
+	}
+
+	if w == nil {
+		// v references only itself.  It must be in
+		// a dead code loop.  Don't bother modifying it.
+		return false
+	}
+	v.Op = OpCopy
+	v.SetArgs1(w)
+	return true
+}
--- a/src/cmd/compile/internal/ssa/phiopt.go
+++ b/src/cmd/compile/internal/ssa/phiopt.go
@ -0,0 +1,86 @@
+package ssa
+
+// phiopt eliminates boolean Phis based on the previous if.
+//
+// Main use case is to transform:
+//   x := false
+//   if b {
+//     x = true
+//   }
+// into x = b.
+//
+// In SSA code this appears as
+//
+// b0
+//   If b -> b1 b2
+// b1
+//   Plain -> b2
+// b2
+//   x = (OpPhi (ConstBool [true]) (ConstBool [false]))
+//
+// In this case we can replace x with a copy of b.
+func phiopt(f *Func) {
+	for _, b := range f.Blocks {
+		if len(b.Preds) != 2 || len(b.Values) == 0 {
+			continue
+		}
+
+		pb0, b0 := b, b.Preds[0]
+		for b0.Kind != BlockIf && len(b0.Preds) == 1 {
+			pb0, b0 = b0, b0.Preds[0]
+		}
+		if b0.Kind != BlockIf {
+			continue
+		}
+		pb1, b1 := b, b.Preds[1]
+		for b1.Kind != BlockIf && len(b1.Preds) == 1 {
+			pb1, b1 = b1, b1.Preds[0]
+		}
+		if b1 != b0 {
+			continue
+		}
+		// b0 is the if block giving the boolean value.
+
+		var reverse bool
+		if b0.Succs[0] == pb0 && b0.Succs[1] == pb1 {
+			reverse = false
+		} else if b0.Succs[0] == pb1 && b0.Succs[1] == pb0 {
+			reverse = true
+		} else {
+			b.Fatalf("invalid predecessors\n")
+		}
+
+		for _, v := range b.Values {
+			if v.Op != OpPhi || !v.Type.IsBoolean() || v.Args[0].Op != OpConstBool || v.Args[1].Op != OpConstBool {
+				continue
+			}
+
+			ok, isCopy := false, false
+			if v.Args[0].AuxInt == 1 && v.Args[1].AuxInt == 0 {
+				ok, isCopy = true, !reverse
+			} else if v.Args[0].AuxInt == 0 && v.Args[1].AuxInt == 1 {
+				ok, isCopy = true, reverse
+			}
+
+			// (Phi (ConstBool [x]) (ConstBool [x])) is already handled by opt / phielim.
+
+			if ok && isCopy {
+				if f.pass.debug > 0 {
+					f.Config.Warnl(int(b.Line), "converted OpPhi to OpCopy")
+				}
+				v.reset(OpCopy)
+				v.AddArg(b0.Control)
+				continue
+			}
+			if ok && !isCopy {
+				if f.pass.debug > 0 {
+					f.Config.Warnl(int(b.Line), "converted OpPhi to OpNot")
+				}
+				v.reset(OpNot)
+				v.AddArg(b0.Control)
+				continue
+			}
+		}
+	}
+
+}
--- a/src/cmd/compile/internal/ssa/print.go
+++ b/src/cmd/compile/internal/ssa/print.go
@ -0,0 +1,149 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+)
+
+func printFunc(f *Func) {
+	f.Logf("%s", f)
+}
+
+func (f *Func) String() string {
+	var buf bytes.Buffer
+	p := stringFuncPrinter{w: &buf}
+	fprintFunc(p, f)
+	return buf.String()
+}
+
+type funcPrinter interface {
+	header(f *Func)
+	startBlock(b *Block, reachable bool)
+	endBlock(b *Block)
+	value(v *Value, live bool)
+	startDepCycle()
+	endDepCycle()
+	named(n LocalSlot, vals []*Value)
+}
+
+type stringFuncPrinter struct {
+	w io.Writer
+}
+
+func (p stringFuncPrinter) header(f *Func) {
+	fmt.Fprint(p.w, f.Name)
+	fmt.Fprint(p.w, " ")
+	fmt.Fprintln(p.w, f.Type)
+}
+
+func (p stringFuncPrinter) startBlock(b *Block, reachable bool) {
+	fmt.Fprintf(p.w, "  b%d:", b.ID)
+	if len(b.Preds) > 0 {
+		io.WriteString(p.w, " <-")
+		for _, pred := range b.Preds {
+			fmt.Fprintf(p.w, " b%d", pred.ID)
+		}
+	}
+	if !reachable {
+		fmt.Fprint(p.w, " DEAD")
+	}
+	io.WriteString(p.w, "\n")
+}
+
+func (p stringFuncPrinter) endBlock(b *Block) {
+	fmt.Fprintln(p.w, "    "+b.LongString())
+}
+
+func (p stringFuncPrinter) value(v *Value, live bool) {
+	fmt.Fprint(p.w, "    ")
+	//fmt.Fprint(p.w, v.Block.Func.Config.fe.Line(v.Line))
+	//fmt.Fprint(p.w, ": ")
+	fmt.Fprint(p.w, v.LongString())
+	if !live {
+		fmt.Fprint(p.w, " DEAD")
+	}
+	fmt.Fprintln(p.w)
+}
+
+func (p stringFuncPrinter) startDepCycle() {
+	fmt.Fprintln(p.w, "dependency cycle!")
+}
+
+func (p stringFuncPrinter) endDepCycle() {}
+
+func (p stringFuncPrinter) named(n LocalSlot, vals []*Value) {
+	fmt.Fprintf(p.w, "name %s: %v\n", n.Name(), vals)
+}
+
+func fprintFunc(p funcPrinter, f *Func) {
+	reachable, live := findlive(f)
+	p.header(f)
+	printed := make([]bool, f.NumValues())
+	for _, b := range f.Blocks {
+		p.startBlock(b, reachable[b.ID])
+
+		if f.scheduled {
+			// Order of Values has been decided - print in that order.
+			for _, v := range b.Values {
+				p.value(v, live[v.ID])
+				printed[v.ID] = true
+			}
+			p.endBlock(b)
+			continue
+		}
+
+		// print phis first since all value cycles contain a phi
+		n := 0
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			p.value(v, live[v.ID])
+			printed[v.ID] = true
+			n++
+		}
+
+		// print rest of values in dependency order
+		for n < len(b.Values) {
+			m := n
+		outer:
+			for _, v := range b.Values {
+				if printed[v.ID] {
+					continue
+				}
+				for _, w := range v.Args {
+					// w == nil shouldn't happen, but if it does,
+					// don't panic; we'll get a better diagnosis later.
+					if w != nil && w.Block == b && !printed[w.ID] {
+						continue outer
+					}
+				}
+				p.value(v, live[v.ID])
+				printed[v.ID] = true
+				n++
+			}
+			if m == n {
+				p.startDepCycle()
+				for _, v := range b.Values {
+					if printed[v.ID] {
+						continue
+					}
+					p.value(v, live[v.ID])
+					printed[v.ID] = true
+					n++
+				}
+				p.endDepCycle()
+			}
+		}
+
+		p.endBlock(b)
+	}
+	for name, vals := range f.NamedValues {
+		p.named(name, vals)
+	}
+}
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@ -0,0 +1,351 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// rangeMask represents the possible relations between a pair of variables.
+type rangeMask uint
+
+const (
+	lt rangeMask = 1 << iota
+	eq
+	gt
+)
+
+// typeMask represents the universe of a variable pair in which
+// a set of relations is known.
+// For example, information learned for unsigned pairs cannot
+// be transfered to signed pairs because the same bit representation
+// can mean something else.
+type typeMask uint
+
+const (
+	signed typeMask = 1 << iota
+	unsigned
+	pointer
+)
+
+type typeRange struct {
+	t typeMask
+	r rangeMask
+}
+
+type control struct {
+	tm     typeMask
+	a0, a1 ID
+}
+
+var (
+	reverseBits = [...]rangeMask{0, 4, 2, 6, 1, 5, 3, 7}
+
+	// maps what we learn when the positive branch is taken.
+	// For example:
+	//      OpLess8:   {signed, lt},
+	//	v1 = (OpLess8 v2 v3).
+	// If v1 branch is taken than we learn that the rangeMaks
+	// can be at most lt.
+	typeRangeTable = map[Op]typeRange{
+		OpEq8:   {signed | unsigned, eq},
+		OpEq16:  {signed | unsigned, eq},
+		OpEq32:  {signed | unsigned, eq},
+		OpEq64:  {signed | unsigned, eq},
+		OpEqPtr: {pointer, eq},
+
+		OpNeq8:   {signed | unsigned, lt | gt},
+		OpNeq16:  {signed | unsigned, lt | gt},
+		OpNeq32:  {signed | unsigned, lt | gt},
+		OpNeq64:  {signed | unsigned, lt | gt},
+		OpNeqPtr: {pointer, lt | gt},
+
+		OpLess8:   {signed, lt},
+		OpLess8U:  {unsigned, lt},
+		OpLess16:  {signed, lt},
+		OpLess16U: {unsigned, lt},
+		OpLess32:  {signed, lt},
+		OpLess32U: {unsigned, lt},
+		OpLess64:  {signed, lt},
+		OpLess64U: {unsigned, lt},
+
+		OpLeq8:   {signed, lt | eq},
+		OpLeq8U:  {unsigned, lt | eq},
+		OpLeq16:  {signed, lt | eq},
+		OpLeq16U: {unsigned, lt | eq},
+		OpLeq32:  {signed, lt | eq},
+		OpLeq32U: {unsigned, lt | eq},
+		OpLeq64:  {signed, lt | eq},
+		OpLeq64U: {unsigned, lt | eq},
+
+		OpGeq8:   {signed, eq | gt},
+		OpGeq8U:  {unsigned, eq | gt},
+		OpGeq16:  {signed, eq | gt},
+		OpGeq16U: {unsigned, eq | gt},
+		OpGeq32:  {signed, eq | gt},
+		OpGeq32U: {unsigned, eq | gt},
+		OpGeq64:  {signed, eq | gt},
+		OpGeq64U: {unsigned, eq | gt},
+
+		OpGreater8:   {signed, gt},
+		OpGreater8U:  {unsigned, gt},
+		OpGreater16:  {signed, gt},
+		OpGreater16U: {unsigned, gt},
+		OpGreater32:  {signed, gt},
+		OpGreater32U: {unsigned, gt},
+		OpGreater64:  {signed, gt},
+		OpGreater64U: {unsigned, gt},
+
+		// TODO: OpIsInBounds actually test 0 <= a < b. This means
+		// that the positive branch learns signed/LT and unsigned/LT
+		// but the negative branch only learns unsigned/GE.
+		OpIsInBounds:      {unsigned, lt},
+		OpIsSliceInBounds: {unsigned, lt | eq},
+	}
+)
+
+// prove removes redundant BlockIf controls that can be inferred in a straight line.
+//
+// By far, the most common redundant control are generated by bounds checking.
+// For example for the code:
+//
+//    a[i] = 4
+//    foo(a[i])
+//
+// The compiler will generate the following code:
+//
+//    if i >= len(a) {
+//        panic("not in bounds")
+//    }
+//    a[i] = 4
+//    if i >= len(a) {
+//        panic("not in bounds")
+//    }
+//    foo(a[i])
+//
+// The second comparison i >= len(a) is clearly redundant because if the
+// else branch of the first comparison is executed, we already know that i < len(a).
+// The code for the second panic can be removed.
+func prove(f *Func) {
+	idom := dominators(f)
+	sdom := newSparseTree(f, idom)
+
+	// current node state
+	type walkState int
+	const (
+		descend walkState = iota
+		simplify
+	)
+	// work maintains the DFS stack.
+	type bp struct {
+		block *Block      // current handled block
+		state walkState   // what's to do
+		saved []typeRange // save previous map entries modified by node
+	}
+	work := make([]bp, 0, 256)
+	work = append(work, bp{
+		block: f.Entry,
+		state: descend,
+	})
+
+	// mask keep tracks of restrictions for each pair of values in
+	// the dominators for the current node.
+	// Invariant: a0.ID <= a1.ID
+	// For example {unsigned, a0, a1} -> eq|gt means that from
+	// predecessors we know that a0 must be greater or equal to
+	// a1.
+	mask := make(map[control]rangeMask)
+
+	// DFS on the dominator tree.
+	for len(work) > 0 {
+		node := work[len(work)-1]
+		work = work[:len(work)-1]
+
+		switch node.state {
+		case descend:
+			parent := idom[node.block.ID]
+			tr := getRestrict(sdom, parent, node.block)
+			saved := updateRestrictions(mask, parent, tr)
+
+			work = append(work, bp{
+				block: node.block,
+				state: simplify,
+				saved: saved,
+			})
+
+			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
+				work = append(work, bp{
+					block: s,
+					state: descend,
+				})
+			}
+
+		case simplify:
+			simplifyBlock(mask, node.block)
+			restoreRestrictions(mask, idom[node.block.ID], node.saved)
+		}
+	}
+}
+
+// getRestrict returns the range restrictions added by p
+// when reaching b. p is the immediate dominator or b.
+func getRestrict(sdom sparseTree, p *Block, b *Block) typeRange {
+	if p == nil || p.Kind != BlockIf {
+		return typeRange{}
+	}
+	tr, has := typeRangeTable[p.Control.Op]
+	if !has {
+		return typeRange{}
+	}
+	// If p and p.Succs[0] are dominators it means that every path
+	// from entry to b passes through p and p.Succs[0]. We care that
+	// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
+	// has one predecessor then (apart from the degenerate case),
+	// there is no path from entry that can reach b through p.Succs[1].
+	// TODO: how about p->yes->b->yes, i.e. a loop in yes.
+	if sdom.isAncestorEq(p.Succs[0], b) && len(p.Succs[0].Preds) == 1 {
+		return tr
+	} else if sdom.isAncestorEq(p.Succs[1], b) && len(p.Succs[1].Preds) == 1 {
+		tr.r = (lt | eq | gt) ^ tr.r
+		return tr
+	}
+	return typeRange{}
+}
+
+// updateRestrictions updates restrictions from the previous block (p) based on tr.
+// normally tr was calculated with getRestrict.
+func updateRestrictions(mask map[control]rangeMask, p *Block, tr typeRange) []typeRange {
+	if tr.t == 0 {
+		return nil
+	}
+
+	// p modifies the restrictions for (a0, a1).
+	// save and return the previous state.
+	a0 := p.Control.Args[0]
+	a1 := p.Control.Args[1]
+	if a0.ID > a1.ID {
+		tr.r = reverseBits[tr.r]
+		a0, a1 = a1, a0
+	}
+
+	saved := make([]typeRange, 0, 2)
+	for t := typeMask(1); t <= tr.t; t <<= 1 {
+		if t&tr.t == 0 {
+			continue
+		}
+
+		i := control{t, a0.ID, a1.ID}
+		oldRange, ok := mask[i]
+		if !ok {
+			if a1 != a0 {
+				oldRange = lt | eq | gt
+			} else { // sometimes happens after cse
+				oldRange = eq
+			}
+		}
+		// if i was not already in the map we save the full range
+		// so that when we restore it we properly keep track of it.
+		saved = append(saved, typeRange{t, oldRange})
+		// mask[i] contains the possible relations between a0 and a1.
+		// When we branched from parent we learned that the possible
+		// relations cannot be more than tr.r. We compute the new set of
+		// relations as the intersection betwee the old and the new set.
+		mask[i] = oldRange & tr.r
+	}
+	return saved
+}
+
+func restoreRestrictions(mask map[control]rangeMask, p *Block, saved []typeRange) {
+	if p == nil || p.Kind != BlockIf || len(saved) == 0 {
+		return
+	}
+
+	a0 := p.Control.Args[0].ID
+	a1 := p.Control.Args[1].ID
+	if a0 > a1 {
+		a0, a1 = a1, a0
+	}
+
+	for _, tr := range saved {
+		i := control{tr.t, a0, a1}
+		if tr.r != lt|eq|gt {
+			mask[i] = tr.r
+		} else {
+			delete(mask, i)
+		}
+	}
+}
+
+// simplifyBlock simplifies block known the restrictions in mask.
+func simplifyBlock(mask map[control]rangeMask, b *Block) {
+	if b.Kind != BlockIf {
+		return
+	}
+
+	tr, has := typeRangeTable[b.Control.Op]
+	if !has {
+		return
+	}
+
+	succ := -1
+	a0 := b.Control.Args[0].ID
+	a1 := b.Control.Args[1].ID
+	if a0 > a1 {
+		tr.r = reverseBits[tr.r]
+		a0, a1 = a1, a0
+	}
+
+	for t := typeMask(1); t <= tr.t; t <<= 1 {
+		if t&tr.t == 0 {
+			continue
+		}
+
+		// tr.r represents in which case the positive branch is taken.
+		// m.r represents which cases are possible because of previous relations.
+		// If the set of possible relations m.r is included in the set of relations
+		// need to take the positive branch (or negative) then that branch will
+		// always be taken.
+		// For shortcut, if m.r == 0 then this block is dead code.
+		i := control{t, a0, a1}
+		m := mask[i]
+		if m != 0 && tr.r&m == m {
+			if b.Func.pass.debug > 0 {
+				b.Func.Config.Warnl(int(b.Line), "Proved %s", b.Control.Op)
+			}
+			b.Logf("proved positive branch of %s, block %s in %s\n", b.Control, b, b.Func.Name)
+			succ = 0
+			break
+		}
+		if m != 0 && ((lt|eq|gt)^tr.r)&m == m {
+			if b.Func.pass.debug > 0 {
+				b.Func.Config.Warnl(int(b.Line), "Disproved %s", b.Control.Op)
+			}
+			b.Logf("proved negative branch of %s, block %s in %s\n", b.Control, b, b.Func.Name)
+			succ = 1
+			break
+		}
+	}
+
+	if succ == -1 {
+		// HACK: If the first argument of IsInBounds or IsSliceInBounds
+		// is a constant and we already know that constant is smaller (or equal)
+		// to the upper bound than this is proven. Most useful in cases such as:
+		// if len(a) <= 1 { return }
+		// do something with a[1]
+		c := b.Control
+		if (c.Op == OpIsInBounds || c.Op == OpIsSliceInBounds) &&
+			c.Args[0].Op == OpConst64 && c.Args[0].AuxInt >= 0 {
+			m := mask[control{signed, a0, a1}]
+			if m != 0 && tr.r&m == m {
+				if b.Func.pass.debug > 0 {
+					b.Func.Config.Warnl(int(b.Line), "Proved constant %s", c.Op)
+				}
+				succ = 0
+			}
+		}
+	}
+
+	if succ != -1 {
+		b.Kind = BlockFirst
+		b.Control = nil
+		b.Succs[0], b.Succs[1] = b.Succs[succ], b.Succs[1-succ]
+	}
+}
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
--- a/src/cmd/compile/internal/ssa/regalloc_test.go
+++ b/src/cmd/compile/internal/ssa/regalloc_test.go
@ -0,0 +1,33 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "testing"
+
+func TestLiveControlOps(t *testing.T) {
+	c := testConfig(t)
+	f := Fun(c, "entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, TypeMem, 0, nil),
+			Valu("x", OpAMD64MOVBconst, TypeInt8, 1, nil),
+			Valu("y", OpAMD64MOVBconst, TypeInt8, 2, nil),
+			Valu("a", OpAMD64TESTB, TypeFlags, 0, nil, "x", "y"),
+			Valu("b", OpAMD64TESTB, TypeFlags, 0, nil, "y", "x"),
+			Eq("a", "if", "exit"),
+		),
+		Bloc("if",
+			Eq("b", "plain", "exit"),
+		),
+		Bloc("plain",
+			Goto("exit"),
+		),
+		Bloc("exit",
+			Exit("mem"),
+		),
+	)
+	flagalloc(f.f)
+	regalloc(f.f)
+	checkFunc(f.f)
+}
--- a/Show More
+++ b/Show More