diff --git a/vendor/github.com/crillab/gophersat/bf/bf.go b/vendor/github.com/crillab/gophersat/bf/bf.go index b80b61c1..5d9552f0 100644 --- a/vendor/github.com/crillab/gophersat/bf/bf.go +++ b/vendor/github.com/crillab/gophersat/bf/bf.go @@ -72,8 +72,8 @@ type trueConst struct{} // True is the constant denoting a tautology. var True Formula = trueConst{} -func (t trueConst) nnf() Formula { return t } -func (t trueConst) String() string { return "⊤" } +func (t trueConst) nnf() Formula { return t } +func (t trueConst) String() string { return "⊤" } func (t trueConst) Eval(model map[string]bool) bool { return true } // The "false" constant. @@ -82,8 +82,8 @@ type falseConst struct{} // False is the constant denoting a contradiction. var False Formula = falseConst{} -func (f falseConst) nnf() Formula { return f } -func (f falseConst) String() string { return "⊥" } +func (f falseConst) nnf() Formula { return f } +func (f falseConst) String() string { return "⊥" } func (f falseConst) Eval(model map[string]bool) bool { return false } // Var generates a named boolean variable in a formula. diff --git a/vendor/github.com/crillab/gophersat/solver/card.go b/vendor/github.com/crillab/gophersat/solver/card.go index bedc5e5c..10fb9a92 100644 --- a/vendor/github.com/crillab/gophersat/solver/card.go +++ b/vendor/github.com/crillab/gophersat/solver/card.go @@ -15,10 +15,11 @@ func AtLeast1(lits ...int) CardConstr { // AtMost1 returns a cardinality constraint stating that at most one of the given lits can be true. func AtMost1(lits ...int) CardConstr { + negated := make([]int, len(lits)) for i, lit := range lits { - lits[i] = -lit + negated[i] = -lit } - return CardConstr{Lits: lits, AtLeast: len(lits) - 1} + return CardConstr{Lits: negated, AtLeast: len(lits) - 1} } // Exactly1 returns two cardinality constraints stating that exactly one of the given lits must be true. diff --git a/vendor/github.com/crillab/gophersat/solver/parser.go b/vendor/github.com/crillab/gophersat/solver/parser.go index d2bd8dfb..050eaa66 100644 --- a/vendor/github.com/crillab/gophersat/solver/parser.go +++ b/vendor/github.com/crillab/gophersat/solver/parser.go @@ -55,17 +55,21 @@ func ParseSlice(cnf [][]int) *Problem { return &pb } } - pb.simplify() + pb.simplify2() return &pb } +func isSpace(b byte) bool { + return b == ' ' || b == '\t' || b == '\n' || b == '\r' +} + // readInt reads an int from r. // 'b' is the last read byte. It can be a space, a '-' or a digit. // The int can be negated. // All spaces before the int value are ignored. // Can return EOF. func readInt(b *byte, r *bufio.Reader) (res int, err error) { - for err == nil && (*b == ' ' || *b == '\t' || *b == '\n' || *b == '\r') { + for err == nil && isSpace(*b) { *b, err = r.ReadByte() } if err == io.EOF { @@ -88,7 +92,7 @@ func readInt(b *byte, r *bufio.Reader) (res int, err error) { } res = 10*res + int(*b-'0') *b, err = r.ReadByte() - if *b == ' ' || *b == '\t' || *b == '\n' || *b == '\r' { + if isSpace(*b) { break } } diff --git a/vendor/github.com/crillab/gophersat/solver/parser_pb.go b/vendor/github.com/crillab/gophersat/solver/parser_pb.go index 5e3f0f94..45809da1 100644 --- a/vendor/github.com/crillab/gophersat/solver/parser_pb.go +++ b/vendor/github.com/crillab/gophersat/solver/parser_pb.go @@ -65,6 +65,14 @@ func ParseCardConstrs(constrs []CardConstr) *Problem { return &pb } +func (pb *Problem) appendClause(constr PBConstr) { + lits := make([]Lit, len(constr.Lits)) + for j, val := range constr.Lits { + lits[j] = IntToLit(int32(val)) + } + pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, constr.AtLeast)) +} + // ParsePBConstrs parses and returns a PB problem from PBConstr values. func ParsePBConstrs(constrs []PBConstr) *Problem { var pb Problem @@ -100,11 +108,7 @@ func ParsePBConstrs(constrs []PBConstr) *Problem { } } } else { - lits := make([]Lit, len(constr.Lits)) - for j, val := range constr.Lits { - lits[j] = IntToLit(int32(val)) - } - pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, card)) + pb.appendClause(constr) } } pb.Model = make([]decLevel, pb.NbVars) diff --git a/vendor/github.com/crillab/gophersat/solver/pb.go b/vendor/github.com/crillab/gophersat/solver/pb.go index 0c1841f4..636415ca 100644 --- a/vendor/github.com/crillab/gophersat/solver/pb.go +++ b/vendor/github.com/crillab/gophersat/solver/pb.go @@ -57,12 +57,17 @@ func GtEq(lits []int, weights []int, n int) PBConstr { if len(weights) != 0 && len(lits) != len(weights) { panic("not as many lits as weights") } - for i := range weights { + for i := 0; i < len(weights); i++ { if weights[i] < 0 { weights[i] = -weights[i] n += weights[i] lits[i] = -lits[i] } + if weights[i] == 0 { + weights = append(weights[:i], weights[i+1:]...) + lits = append(lits[:i], lits[i+1:]...) + i-- + } } return PBConstr{Lits: lits, Weights: weights, AtLeast: n} } diff --git a/vendor/github.com/crillab/gophersat/solver/solver.go b/vendor/github.com/crillab/gophersat/solver/solver.go index c6ef3a37..ec014412 100644 --- a/vendor/github.com/crillab/gophersat/solver/solver.go +++ b/vendor/github.com/crillab/gophersat/solver/solver.go @@ -87,10 +87,16 @@ func New(problem *Problem) *Solver { return &Solver{status: Unsat} } nbVars := problem.NbVars + + trailCap := nbVars + if len(problem.Units) > trailCap { + trailCap = len(problem.Units) + } + s := &Solver{ nbVars: nbVars, status: problem.Status, - trail: make([]Lit, len(problem.Units), nbVars), + trail: make([]Lit, len(problem.Units), trailCap), model: problem.Model, activity: make([]float64, nbVars), polarity: make([]bool, nbVars), @@ -343,7 +349,7 @@ func (s *Solver) propagateAndSearch(lit Lit, lvl decLevel) Status { return Indet } if s.Stats.NbConflicts >= s.wl.idxReduce*s.wl.nbMax { - s.wl.idxReduce = (s.Stats.NbConflicts / s.wl.nbMax) + 1 + s.wl.idxReduce = s.Stats.NbConflicts/s.wl.nbMax + 1 s.reduceLearned() s.bumpNbMax() } @@ -738,7 +744,7 @@ func (s *Solver) Optimal(results chan Result, stop chan struct{}) (res Result) { copy(s.lastModel, s.model) // Save this model: it might be the last one cost = 0 for i, lit := range s.minLits { - if (s.model[lit.Var()] > 0) == lit.IsPositive() { + if s.model[lit.Var()] > 0 == lit.IsPositive() { if s.minWeights == nil { cost++ } else { @@ -803,7 +809,7 @@ func (s *Solver) Minimize() int { copy(s.lastModel, s.model) // Save this model: it might be the last one cost = 0 for i, lit := range s.minLits { - if (s.model[lit.Var()] > 0) == lit.IsPositive() { + if s.model[lit.Var()] > 0 == lit.IsPositive() { if s.minWeights == nil { cost++ } else { diff --git a/vendor/github.com/ecooper/qlearning/.gitignore b/vendor/github.com/ecooper/qlearning/.gitignore new file mode 100644 index 00000000..daf913b1 --- /dev/null +++ b/vendor/github.com/ecooper/qlearning/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/ecooper/qlearning/LICENSE b/vendor/github.com/ecooper/qlearning/LICENSE new file mode 100644 index 00000000..3eef5b3f --- /dev/null +++ b/vendor/github.com/ecooper/qlearning/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Eric Cooper + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/ecooper/qlearning/README.md b/vendor/github.com/ecooper/qlearning/README.md new file mode 100644 index 00000000..4bcd1414 --- /dev/null +++ b/vendor/github.com/ecooper/qlearning/README.md @@ -0,0 +1,72 @@ +# qlearning + +The qlearning package provides a series of interfaces and utilities to implement +the [Q-Learning](https://en.wikipedia.org/wiki/Q-learning) algorithm in +Go. + +This project was largely inspired by [flappybird-qlearning- +bot](https://github.com/chncyhn/flappybird-qlearning-bot). + +*Until a release is tagged, qlearning should be considered highly +experimental and mostly a fun toy.* + +## Installation + +```shell +$ go get https://github.com/ecooper/qlearning +``` + +## Quickstart + +qlearning provides example implementations in the [examples](examples/) +directory of the project. + +[hangman.go](examples/hangman.go) provides a naive implementation of +[Hangman](https://en.wikipedia.org/wiki/Hangman_(game)) for use with +qlearning. + +```shell +$ cd $GOPATH/src/github.com/ecooper/qlearning/examples +$ go run hangman.go -h +Usage of hangman: + -debug + Set debug + -games int + Play N games (default 5000000) + -progress int + Print progress messages every N games (default 1000) + -wordlist string + Path to a wordlist (default "./wordlist.txt") + -words int + Use N words from wordlist (default 10000) +``` + +By default, running [hangman.go](examples/hangman.go) will play millions +of games against a 10,000-word corpus. That's a bit overkill for just +trying out qlearning. You can run it against a smaller number of words +for a few number of games using the `-games` and `-words` flags. + +```shell +$ go run hangman.go -words 100 -progress 1000 -games 5000 +100 words loaded +1000 games played: 92 WINS 908 LOSSES 9% WIN RATE +2000 games played: 447 WINS 1553 LOSSES 36% WIN RATE +3000 games played: 1064 WINS 1936 LOSSES 62% WIN RATE +4000 games played: 1913 WINS 2087 LOSSES 85% WIN RATE +5000 games played: 2845 WINS 2155 LOSSES 93% WIN RATE + +Agent performance: 5000 games played, 2845 WINS 2155 LOSSES 57% WIN RATE +``` + +"WIN RATE" per progress report is isolated within that cycle, a group of +1000 games in this example. The win rate is meant to show the velocity +of learning by the agent. If it is "learning", the win rate should be +increasing until reaching convergence. + +As you can see, after 5000 games, the agent is able to "learn" and play +hangman against a 100-word vocabulary. + +## Usage + +See [godocs](https://godoc.org/github.com/ecooper/qlearning) for the +package documentation. diff --git a/vendor/github.com/ecooper/qlearning/qlearning.go b/vendor/github.com/ecooper/qlearning/qlearning.go new file mode 100644 index 00000000..5a36a105 --- /dev/null +++ b/vendor/github.com/ecooper/qlearning/qlearning.go @@ -0,0 +1,167 @@ +// Package qlearning is an experimental set of interfaces and helpers to +// implement the Q-learning algorithm in Go. +// +// This is highly experimental and should be considered a toy. +// +// See https://github.com/ecooper/qlearning/tree/master/examples for +// implementation examples. +package qlearning + +import ( + "fmt" + "math/rand" + "time" +) + +// State is an interface wrapping the current state of the model. +type State interface { + + // String returns a string representation of the given state. + // Implementers should take care to insure that this is a consistent + // hash for a given state. + String() string + + // Next provides a slice of possible Actions that could be applied to + // a state. + Next() []Action +} + +// Action is an interface wrapping an action that can be applied to the +// model's current state. +// +// BUG (ecooper): A state should apply an action, not the other way +// around. +type Action interface { + String() string + Apply(State) State +} + +// Rewarder is an interface wrapping the ability to provide a reward +// for the execution of an action in a given state. +type Rewarder interface { + // Reward calculates the reward value for a given action in a given + // state. + Reward(action *StateAction) float32 +} + +// Agent is an interface for a model's agent and is able to learn +// from actions and return the current Q-value of an action at a given state. +type Agent interface { + // Learn updates the model for a given state and action, using the + // provided Rewarder implementation. + Learn(*StateAction, Rewarder) + + // Value returns the current Q-value for a State and Action. + Value(State, Action) float32 + + // Return a string representation of the Agent. + String() string +} + +// StateAction is a struct grouping an action to a given State. Additionally, +// a Value can be associated to StateAction, which is typically the Q-value. +type StateAction struct { + State State + Action Action + Value float32 +} + +// NewStateAction creates a new StateAction for a State and Action. +func NewStateAction(state State, action Action, val float32) *StateAction { + return &StateAction{ + State: state, + Action: action, + Value: val, + } +} + +// Next uses an Agent and State to find the highest scored Action. +// +// In the case of Q-value ties for a set of actions, a random +// value is selected. +func Next(agent Agent, state State) *StateAction { + best := make([]*StateAction, 0) + bestVal := float32(0.0) + + for _, action := range state.Next() { + val := agent.Value(state, action) + + if bestVal == float32(0.0) { + best = append(best, NewStateAction(state, action, val)) + bestVal = val + } else { + if val > bestVal { + best = []*StateAction{NewStateAction(state, action, val)} + bestVal = val + } else if val == bestVal { + best = append(best, NewStateAction(state, action, val)) + } + } + } + + return best[rand.Intn(len(best))] +} + +// SimpleAgent is an Agent implementation that stores Q-values in a +// map of maps. +type SimpleAgent struct { + q map[string]map[string]float32 + lr float32 + d float32 +} + +// NewSimpleAgent creates a SimpleAgent with the provided learning rate +// and discount factor. +func NewSimpleAgent(lr, d float32) *SimpleAgent { + return &SimpleAgent{ + q: make(map[string]map[string]float32), + d: d, + lr: lr, + } +} + +// getActions returns the current Q-values for a given state. +func (agent *SimpleAgent) getActions(state string) map[string]float32 { + if _, ok := agent.q[state]; !ok { + agent.q[state] = make(map[string]float32) + } + + return agent.q[state] +} + +// Learn updates the existing Q-value for the given State and Action +// using the Rewarder. +// +// See https://en.wikipedia.org/wiki/Q-learning#Algorithm +func (agent *SimpleAgent) Learn(action *StateAction, reward Rewarder) { + current := action.State.String() + next := action.Action.Apply(action.State).String() + + actions := agent.getActions(current) + + maxNextVal := float32(0.0) + for _, v := range agent.getActions(next) { + if v > maxNextVal { + maxNextVal = v + } + } + + currentVal := actions[action.Action.String()] + actions[action.Action.String()] = currentVal + agent.lr*(reward.Reward(action)+agent.d*maxNextVal-currentVal) +} + +// Value gets the current Q-value for a State and Action. +func (agent *SimpleAgent) Value(state State, action Action) float32 { + return agent.getActions(state.String())[action.String()] +} + +// String returns the current Q-value map as a printed string. +// +// BUG (ecooper): This is useless. +func (agent *SimpleAgent) String() string { + return fmt.Sprintf("%v", agent.q) +} + +func init() { + rand.Seed(time.Now().UTC().UnixNano()) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 5957048a..b4d2d0f2 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -49,7 +49,7 @@ github.com/containerd/continuity/pathdriver github.com/containerd/continuity/syscallx # github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d github.com/cpuguy83/go-md2man/v2/md2man -# github.com/crillab/gophersat v1.1.7 +# github.com/crillab/gophersat v1.1.9-0.20200211102949-9a8bf7f2f0a3 github.com/crillab/gophersat/bf github.com/crillab/gophersat/solver # github.com/cyphar/filepath-securejoin v0.2.2 @@ -93,6 +93,8 @@ github.com/docker/go-units github.com/docker/libnetwork/ipamutils # github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 github.com/docker/libtrust +# github.com/ecooper/qlearning v0.0.0-20160612200101-3075011a69fd +github.com/ecooper/qlearning # github.com/fatih/color v1.7.0 github.com/fatih/color # github.com/fsnotify/fsnotify v1.4.7 @@ -263,8 +265,8 @@ go.uber.org/multierr go.uber.org/tools/update-license # go.uber.org/zap v1.13.0 go.uber.org/zap -go.uber.org/zap/internal/bufferpool go.uber.org/zap/zapcore +go.uber.org/zap/internal/bufferpool go.uber.org/zap/buffer go.uber.org/zap/internal/color go.uber.org/zap/internal/exit