From f8d1dee965f76837e891ded19dd59ee264db8ddc Mon Sep 17 00:00:00 2001 From: Ian Cottrell Date: Fri, 10 May 2019 16:36:20 -0400 Subject: [PATCH] internal/memoize: a new library to memoize functions This library holds onto results with a weak reference, and guarantees that for as long as a result has not been garbage collected it will return the same result for the same key. Change-Id: I4a4528f31bf8bbf18809cbffe95dc93e05d769fe Reviewed-on: https://go-review.googlesource.com/c/tools/+/180845 Reviewed-by: Rebecca Stambler --- internal/memoize/memoize.go | 222 +++++++++++++++++++++++++++++++ internal/memoize/memoize_test.go | 176 ++++++++++++++++++++++++ internal/memoize/nocopy.go | 24 ++++ 3 files changed, 422 insertions(+) create mode 100644 internal/memoize/memoize.go create mode 100644 internal/memoize/memoize_test.go create mode 100644 internal/memoize/nocopy.go diff --git a/internal/memoize/memoize.go b/internal/memoize/memoize.go new file mode 100644 index 0000000000..457f9141c2 --- /dev/null +++ b/internal/memoize/memoize.go @@ -0,0 +1,222 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package memoize supports functions with idempotent results that are expensive +// to compute having their return value memorized and returned again the next +// time they are invoked. +// The return values are only remembered for as long as there is still a user +// to prevent excessive memory use. +// To use this package, build a store and use it to aquire handles with the +// Bind method. +package memoize + +import ( + "context" + "runtime" + "sync" + "unsafe" +) + +// Store binds keys to functions, returning handles that can be used to access +// the functions results. +type Store struct { + mu sync.Mutex + // entries is the set of values stored. + entries map[interface{}]*entry +} + +// Function is the type for functions that can be memoized. +// The result must be a pointer. +type Function func(ctx context.Context) interface{} + +// Handle is returned from a store when a key is bound to a function. +// It is then used to access the results of that function. +type Handle struct { + mu sync.Mutex + function Function + entry *entry + value interface{} +} + +// entry holds the machinery to manage a function and its result such that +// there is only one instance of the result live at any given time. +type entry struct { + noCopy + // mu contols access to the typ and ptr fields + mu sync.Mutex + // the calculated value, as stored in an interface{} + typ, ptr uintptr + ready bool + // wait is used to block until the value is ready + // will only be non nil if the generator is already running + wait chan struct{} +} + +// Has returns true if they key is currently valid for this store. +func (s *Store) Has(key interface{}) bool { + s.mu.Lock() + defer s.mu.Unlock() + _, found := s.entries[key] + return found +} + +// Delete removes a key from the store, if present. +func (s *Store) Delete(key interface{}) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.entries, key) +} + +// Bind returns a handle for the given key and function. +// Each call to bind will generate a new handle, but all the handles for a +// single key will refer to the same value, and only the first handle to try to +// get the value will cause the function to be invoked. +// The results of the function are held for as long as there are handles through +// which the result has been accessed. +// Bind does not cause the value to be generated. +func (s *Store) Bind(key interface{}, function Function) *Handle { + // panic early if the function is nil + // it would panic later anyway, but in a way that was much harder to debug + if function == nil { + panic("Function passed to bind must not be nil") + } + // check if we already have the key + s.mu.Lock() + defer s.mu.Unlock() + e, found := s.entries[key] + if !found { + // we have not seen this key before, add a new entry + if s.entries == nil { + s.entries = make(map[interface{}]*entry) + } + e = &entry{} + s.entries[key] = e + } + return &Handle{ + entry: e, + function: function, + } +} + +// Cached returns the value associated with a key. +// It cannot cause the value to be generated, but will return the cached +// value if present. +func (s *Store) Cached(key interface{}) interface{} { + s.mu.Lock() + defer s.mu.Unlock() + e, found := s.entries[key] + if !found { + return nil + } + e.mu.Lock() + defer e.mu.Unlock() + return unref(e) +} + +// Cached returns the value associated with a handle. +// It will never cause the value to be generated, it will return the cached +// value if present. +func (h *Handle) Cached() interface{} { + h.mu.Lock() + defer h.mu.Unlock() + if h.value == nil { + h.entry.mu.Lock() + defer h.entry.mu.Unlock() + h.value = unref(h.entry) + } + return h.value +} + +// Get returns the value associated with a handle. +// If the value is not yet ready, the underlying function will be invoked. +// This makes this handle active, it will remember the value for as long as +// it exists, and cause any other handles for the same key to also return the +// same value. +func (h *Handle) Get(ctx context.Context) interface{} { + h.mu.Lock() + defer h.mu.Unlock() + if h.function != nil { + if v, ok := h.entry.get(ctx, h.function); ok { + h.value = v + h.function = nil + h.entry = nil + } + } + return h.value +} + +// get is the implementation of Get. +func (e *entry) get(ctx context.Context, f Function) (interface{}, bool) { + e.mu.Lock() + defer e.mu.Unlock() + // fast path if we already have a value + if e.ready { + return unref(e), true + } + // value is not ready, and we hold the lock + // see if the value is already being calculated + var value interface{} + if e.wait == nil { + e.wait = make(chan struct{}) + go func() { + defer func() { + close(e.wait) + e.wait = nil + }() + // e is not locked here + ctx := context.Background() + value = f(ctx) + // function is done, return to locked state so we can update the entry + e.mu.Lock() + defer e.mu.Unlock() + setref(e, value) + }() + } + // get a local copy of wait while we still hold the lock + wait := e.wait + e.mu.Unlock() + // release the lock while we wait + select { + case <-wait: + // we should now have a value + e.mu.Lock() + result := unref(e) + // the keep alive makes sure value is not garbage collected before unref + runtime.KeepAlive(value) + return result, true + case <-ctx.Done(): + // our context was cancelled + e.mu.Lock() + return nil, false + } +} + +// setref is called to store a value into an entry +// it must only be called when the lock is held +func setref(e *entry, value interface{}) interface{} { + // this is only called when the entry lock is already held + data := (*[2]uintptr)(unsafe.Pointer(&value)) + // store the value back to the entry as a weak reference + e.typ, e.ptr = data[0], data[1] + e.ready = true + if e.ptr != 0 { + // and arrange to clear the weak reference if the object is collected + runtime.SetFinalizer(value, func(_ interface{}) { + // clear the now invalid non pointer + e.mu.Lock() + defer e.mu.Unlock() + e.typ, e.ptr = 0, 0 + e.ready = false + }) + } + return value +} + +func unref(e *entry) interface{} { + // this is only called when the entry lock is already held + var v interface{} + data := (*[2]uintptr)(unsafe.Pointer(&v)) + data[0], data[1] = e.typ, e.ptr + return v +} diff --git a/internal/memoize/memoize_test.go b/internal/memoize/memoize_test.go new file mode 100644 index 0000000000..a1041c03fb --- /dev/null +++ b/internal/memoize/memoize_test.go @@ -0,0 +1,176 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package memoize_test + +import ( + "bytes" + "context" + "fmt" + "io" + "runtime" + "strings" + "testing" + "time" + + "golang.org/x/tools/internal/memoize" +) + +func TestStore(t *testing.T) { + pinned := []string{"b", "_1", "_3"} + unpinned := []string{"a", "c", "d", "_2", "_4"} + ctx := context.Background() + s := &memoize.Store{} + logBuffer := &bytes.Buffer{} + s.Bind("logger", func(context.Context) interface{} { return logBuffer }).Get(ctx) + verifyBuffer := func(name, expect string) { + got := logBuffer.String() + if got != expect { + t.Errorf("at %q expected:\n%v\ngot:\n%s", name, expect, got) + } + logBuffer.Reset() + } + verifyBuffer("nothing", ``) + s.Bind("_1", generate(s, "_1")).Get(ctx) + verifyBuffer("get 1", ` +start @1 +simple a = A +simple b = B +simple c = C +end @1 = A B C +`[1:]) + s.Bind("_1", generate(s, "_1")).Get(ctx) + verifyBuffer("redo 1", ``) + s.Bind("_2", generate(s, "_2")).Get(ctx) + verifyBuffer("get 2", ` +start @2 +simple d = D +simple e = E +simple f = F +end @2 = D E F +`[1:]) + s.Bind("_2", generate(s, "_2")).Get(ctx) + verifyBuffer("redo 2", ``) + s.Bind("_3", generate(s, "_3")).Get(ctx) + verifyBuffer("get 3", ` +start @3 +end @3 = @1[ A B C] @2[ D E F] +`[1:]) + s.Bind("_4", generate(s, "_4")).Get(ctx) + verifyBuffer("get 4", ` +start @3 +simple g = G +error ERR = fail +simple h = H +end @3 = G !fail H +`[1:]) + + var pins []*memoize.Handle + for _, key := range pinned { + h := s.Bind(key, generate(s, key)) + h.Get(ctx) + pins = append(pins, h) + } + + runAllFinalizers(t) + + for _, k := range pinned { + if v := s.Cached(k); v == nil { + t.Errorf("Pinned value %q was nil", k) + } + } + for _, k := range unpinned { + if v := s.Cached(k); v != nil { + t.Errorf("Unpinned value %q was %q", k, v) + } + } + runtime.KeepAlive(pins) +} + +func runAllFinalizers(t *testing.T) { + // the following is very tricky, be very careful changing it + // it relies on behavior of finalizers that is not guaranteed + // first run the GC to queue the finalizers + runtime.GC() + // wait is used to signal that the finalizers are all done + wait := make(chan struct{}) + // register a finalizer against an immediately collectible object + runtime.SetFinalizer(&struct{ s string }{"obj"}, func(_ interface{}) { close(wait) }) + // now run the GC again to pick up the tracker + runtime.GC() + // now wait for the finalizers to run + select { + case <-wait: + case <-time.Tick(time.Second): + t.Fatalf("Finalizers had not run after a second") + } +} + +type stringOrError struct { + memoize.NoCopy + value string + err error +} + +func (v *stringOrError) String() string { + if v.err != nil { + return v.err.Error() + } + return v.value +} + +func asValue(v interface{}) *stringOrError { + if v == nil { + return nil + } + return v.(*stringOrError) +} + +func generate(s *memoize.Store, key interface{}) memoize.Function { + return func(ctx context.Context) interface{} { + name := key.(string) + switch name { + case "err": + return logGenerator(ctx, s, "ERR", "", fmt.Errorf("fail")) + case "_1": + return joinValues(ctx, s, "@1", "a", "b", "c") + case "_2": + return joinValues(ctx, s, "@2", "d", "e", "f") + case "_3": + return joinValues(ctx, s, "@3", "_1", "_2") + case "_4": + return joinValues(ctx, s, "@3", "g", "err", "h") + default: + return logGenerator(ctx, s, name, strings.ToUpper(name), nil) + } + } +} + +func logGenerator(ctx context.Context, s *memoize.Store, name string, v string, err error) *stringOrError { + w := s.Cached("logger").(io.Writer) + if err != nil { + fmt.Fprintf(w, "error %v = %v\n", name, err) + } else { + fmt.Fprintf(w, "simple %v = %v\n", name, v) + } + return &stringOrError{value: v, err: err} +} + +func joinValues(ctx context.Context, s *memoize.Store, name string, keys ...string) *stringOrError { + w := s.Cached("logger").(io.Writer) + fmt.Fprintf(w, "start %v\n", name) + value := "" + for _, key := range keys { + v := asValue(s.Bind(key, generate(s, key)).Get(ctx)) + if v == nil { + value = value + " " + } else if v.err != nil { + value = value + " !" + v.err.Error() + } else { + value = value + " " + v.value + } + } + fmt.Fprintf(w, "end %v = %v\n", name, value) + return &stringOrError{value: fmt.Sprintf("%s[%v]", name, value)} +} diff --git a/internal/memoize/nocopy.go b/internal/memoize/nocopy.go new file mode 100644 index 0000000000..891322599b --- /dev/null +++ b/internal/memoize/nocopy.go @@ -0,0 +1,24 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package memoize + +// NoCopy is a type with no public methods that will trigger a vet check if it +// is ever copied. +// You can embed this in any type intended to be used as a value. This helps +// avoid accidentally holding a copy of a value instead of the value itself. +type NoCopy struct { + noCopy noCopy +} + +// noCopy may be embedded into structs which must not be copied +// after the first use. +// +// See https://golang.org/issues/8005#issuecomment-190753527 +// for details. +type noCopy struct{} + +// Lock is a no-op used by -copylocks checker from `go vet`. +func (*noCopy) Lock() {} +func (*noCopy) Unlock() {}