mirror of
https://github.com/mudler/luet.git
synced 2025-09-02 07:45:02 +00:00
update vendor/
This commit is contained in:
8
vendor/github.com/crillab/gophersat/bf/bf.go
generated
vendored
8
vendor/github.com/crillab/gophersat/bf/bf.go
generated
vendored
@@ -72,8 +72,8 @@ type trueConst struct{}
|
|||||||
// True is the constant denoting a tautology.
|
// True is the constant denoting a tautology.
|
||||||
var True Formula = trueConst{}
|
var True Formula = trueConst{}
|
||||||
|
|
||||||
func (t trueConst) nnf() Formula { return t }
|
func (t trueConst) nnf() Formula { return t }
|
||||||
func (t trueConst) String() string { return "⊤" }
|
func (t trueConst) String() string { return "⊤" }
|
||||||
func (t trueConst) Eval(model map[string]bool) bool { return true }
|
func (t trueConst) Eval(model map[string]bool) bool { return true }
|
||||||
|
|
||||||
// The "false" constant.
|
// The "false" constant.
|
||||||
@@ -82,8 +82,8 @@ type falseConst struct{}
|
|||||||
// False is the constant denoting a contradiction.
|
// False is the constant denoting a contradiction.
|
||||||
var False Formula = falseConst{}
|
var False Formula = falseConst{}
|
||||||
|
|
||||||
func (f falseConst) nnf() Formula { return f }
|
func (f falseConst) nnf() Formula { return f }
|
||||||
func (f falseConst) String() string { return "⊥" }
|
func (f falseConst) String() string { return "⊥" }
|
||||||
func (f falseConst) Eval(model map[string]bool) bool { return false }
|
func (f falseConst) Eval(model map[string]bool) bool { return false }
|
||||||
|
|
||||||
// Var generates a named boolean variable in a formula.
|
// Var generates a named boolean variable in a formula.
|
||||||
|
5
vendor/github.com/crillab/gophersat/solver/card.go
generated
vendored
5
vendor/github.com/crillab/gophersat/solver/card.go
generated
vendored
@@ -15,10 +15,11 @@ func AtLeast1(lits ...int) CardConstr {
|
|||||||
|
|
||||||
// AtMost1 returns a cardinality constraint stating that at most one of the given lits can be true.
|
// AtMost1 returns a cardinality constraint stating that at most one of the given lits can be true.
|
||||||
func AtMost1(lits ...int) CardConstr {
|
func AtMost1(lits ...int) CardConstr {
|
||||||
|
negated := make([]int, len(lits))
|
||||||
for i, lit := range lits {
|
for i, lit := range lits {
|
||||||
lits[i] = -lit
|
negated[i] = -lit
|
||||||
}
|
}
|
||||||
return CardConstr{Lits: lits, AtLeast: len(lits) - 1}
|
return CardConstr{Lits: negated, AtLeast: len(lits) - 1}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exactly1 returns two cardinality constraints stating that exactly one of the given lits must be true.
|
// Exactly1 returns two cardinality constraints stating that exactly one of the given lits must be true.
|
||||||
|
10
vendor/github.com/crillab/gophersat/solver/parser.go
generated
vendored
10
vendor/github.com/crillab/gophersat/solver/parser.go
generated
vendored
@@ -55,17 +55,21 @@ func ParseSlice(cnf [][]int) *Problem {
|
|||||||
return &pb
|
return &pb
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pb.simplify()
|
pb.simplify2()
|
||||||
return &pb
|
return &pb
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isSpace(b byte) bool {
|
||||||
|
return b == ' ' || b == '\t' || b == '\n' || b == '\r'
|
||||||
|
}
|
||||||
|
|
||||||
// readInt reads an int from r.
|
// readInt reads an int from r.
|
||||||
// 'b' is the last read byte. It can be a space, a '-' or a digit.
|
// 'b' is the last read byte. It can be a space, a '-' or a digit.
|
||||||
// The int can be negated.
|
// The int can be negated.
|
||||||
// All spaces before the int value are ignored.
|
// All spaces before the int value are ignored.
|
||||||
// Can return EOF.
|
// Can return EOF.
|
||||||
func readInt(b *byte, r *bufio.Reader) (res int, err error) {
|
func readInt(b *byte, r *bufio.Reader) (res int, err error) {
|
||||||
for err == nil && (*b == ' ' || *b == '\t' || *b == '\n' || *b == '\r') {
|
for err == nil && isSpace(*b) {
|
||||||
*b, err = r.ReadByte()
|
*b, err = r.ReadByte()
|
||||||
}
|
}
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
@@ -88,7 +92,7 @@ func readInt(b *byte, r *bufio.Reader) (res int, err error) {
|
|||||||
}
|
}
|
||||||
res = 10*res + int(*b-'0')
|
res = 10*res + int(*b-'0')
|
||||||
*b, err = r.ReadByte()
|
*b, err = r.ReadByte()
|
||||||
if *b == ' ' || *b == '\t' || *b == '\n' || *b == '\r' {
|
if isSpace(*b) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
14
vendor/github.com/crillab/gophersat/solver/parser_pb.go
generated
vendored
14
vendor/github.com/crillab/gophersat/solver/parser_pb.go
generated
vendored
@@ -65,6 +65,14 @@ func ParseCardConstrs(constrs []CardConstr) *Problem {
|
|||||||
return &pb
|
return &pb
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pb *Problem) appendClause(constr PBConstr) {
|
||||||
|
lits := make([]Lit, len(constr.Lits))
|
||||||
|
for j, val := range constr.Lits {
|
||||||
|
lits[j] = IntToLit(int32(val))
|
||||||
|
}
|
||||||
|
pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, constr.AtLeast))
|
||||||
|
}
|
||||||
|
|
||||||
// ParsePBConstrs parses and returns a PB problem from PBConstr values.
|
// ParsePBConstrs parses and returns a PB problem from PBConstr values.
|
||||||
func ParsePBConstrs(constrs []PBConstr) *Problem {
|
func ParsePBConstrs(constrs []PBConstr) *Problem {
|
||||||
var pb Problem
|
var pb Problem
|
||||||
@@ -100,11 +108,7 @@ func ParsePBConstrs(constrs []PBConstr) *Problem {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
lits := make([]Lit, len(constr.Lits))
|
pb.appendClause(constr)
|
||||||
for j, val := range constr.Lits {
|
|
||||||
lits[j] = IntToLit(int32(val))
|
|
||||||
}
|
|
||||||
pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, card))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pb.Model = make([]decLevel, pb.NbVars)
|
pb.Model = make([]decLevel, pb.NbVars)
|
||||||
|
7
vendor/github.com/crillab/gophersat/solver/pb.go
generated
vendored
7
vendor/github.com/crillab/gophersat/solver/pb.go
generated
vendored
@@ -57,12 +57,17 @@ func GtEq(lits []int, weights []int, n int) PBConstr {
|
|||||||
if len(weights) != 0 && len(lits) != len(weights) {
|
if len(weights) != 0 && len(lits) != len(weights) {
|
||||||
panic("not as many lits as weights")
|
panic("not as many lits as weights")
|
||||||
}
|
}
|
||||||
for i := range weights {
|
for i := 0; i < len(weights); i++ {
|
||||||
if weights[i] < 0 {
|
if weights[i] < 0 {
|
||||||
weights[i] = -weights[i]
|
weights[i] = -weights[i]
|
||||||
n += weights[i]
|
n += weights[i]
|
||||||
lits[i] = -lits[i]
|
lits[i] = -lits[i]
|
||||||
}
|
}
|
||||||
|
if weights[i] == 0 {
|
||||||
|
weights = append(weights[:i], weights[i+1:]...)
|
||||||
|
lits = append(lits[:i], lits[i+1:]...)
|
||||||
|
i--
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return PBConstr{Lits: lits, Weights: weights, AtLeast: n}
|
return PBConstr{Lits: lits, Weights: weights, AtLeast: n}
|
||||||
}
|
}
|
||||||
|
14
vendor/github.com/crillab/gophersat/solver/solver.go
generated
vendored
14
vendor/github.com/crillab/gophersat/solver/solver.go
generated
vendored
@@ -87,10 +87,16 @@ func New(problem *Problem) *Solver {
|
|||||||
return &Solver{status: Unsat}
|
return &Solver{status: Unsat}
|
||||||
}
|
}
|
||||||
nbVars := problem.NbVars
|
nbVars := problem.NbVars
|
||||||
|
|
||||||
|
trailCap := nbVars
|
||||||
|
if len(problem.Units) > trailCap {
|
||||||
|
trailCap = len(problem.Units)
|
||||||
|
}
|
||||||
|
|
||||||
s := &Solver{
|
s := &Solver{
|
||||||
nbVars: nbVars,
|
nbVars: nbVars,
|
||||||
status: problem.Status,
|
status: problem.Status,
|
||||||
trail: make([]Lit, len(problem.Units), nbVars),
|
trail: make([]Lit, len(problem.Units), trailCap),
|
||||||
model: problem.Model,
|
model: problem.Model,
|
||||||
activity: make([]float64, nbVars),
|
activity: make([]float64, nbVars),
|
||||||
polarity: make([]bool, nbVars),
|
polarity: make([]bool, nbVars),
|
||||||
@@ -343,7 +349,7 @@ func (s *Solver) propagateAndSearch(lit Lit, lvl decLevel) Status {
|
|||||||
return Indet
|
return Indet
|
||||||
}
|
}
|
||||||
if s.Stats.NbConflicts >= s.wl.idxReduce*s.wl.nbMax {
|
if s.Stats.NbConflicts >= s.wl.idxReduce*s.wl.nbMax {
|
||||||
s.wl.idxReduce = (s.Stats.NbConflicts / s.wl.nbMax) + 1
|
s.wl.idxReduce = s.Stats.NbConflicts/s.wl.nbMax + 1
|
||||||
s.reduceLearned()
|
s.reduceLearned()
|
||||||
s.bumpNbMax()
|
s.bumpNbMax()
|
||||||
}
|
}
|
||||||
@@ -738,7 +744,7 @@ func (s *Solver) Optimal(results chan Result, stop chan struct{}) (res Result) {
|
|||||||
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
||||||
cost = 0
|
cost = 0
|
||||||
for i, lit := range s.minLits {
|
for i, lit := range s.minLits {
|
||||||
if (s.model[lit.Var()] > 0) == lit.IsPositive() {
|
if s.model[lit.Var()] > 0 == lit.IsPositive() {
|
||||||
if s.minWeights == nil {
|
if s.minWeights == nil {
|
||||||
cost++
|
cost++
|
||||||
} else {
|
} else {
|
||||||
@@ -803,7 +809,7 @@ func (s *Solver) Minimize() int {
|
|||||||
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
||||||
cost = 0
|
cost = 0
|
||||||
for i, lit := range s.minLits {
|
for i, lit := range s.minLits {
|
||||||
if (s.model[lit.Var()] > 0) == lit.IsPositive() {
|
if s.model[lit.Var()] > 0 == lit.IsPositive() {
|
||||||
if s.minWeights == nil {
|
if s.minWeights == nil {
|
||||||
cost++
|
cost++
|
||||||
} else {
|
} else {
|
||||||
|
24
vendor/github.com/ecooper/qlearning/.gitignore
generated
vendored
Normal file
24
vendor/github.com/ecooper/qlearning/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||||
|
*.o
|
||||||
|
*.a
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Folders
|
||||||
|
_obj
|
||||||
|
_test
|
||||||
|
|
||||||
|
# Architecture specific extensions/prefixes
|
||||||
|
*.[568vq]
|
||||||
|
[568vq].out
|
||||||
|
|
||||||
|
*.cgo1.go
|
||||||
|
*.cgo2.c
|
||||||
|
_cgo_defun.c
|
||||||
|
_cgo_gotypes.go
|
||||||
|
_cgo_export.*
|
||||||
|
|
||||||
|
_testmain.go
|
||||||
|
|
||||||
|
*.exe
|
||||||
|
*.test
|
||||||
|
*.prof
|
21
vendor/github.com/ecooper/qlearning/LICENSE
generated
vendored
Normal file
21
vendor/github.com/ecooper/qlearning/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2016 Eric Cooper
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
72
vendor/github.com/ecooper/qlearning/README.md
generated
vendored
Normal file
72
vendor/github.com/ecooper/qlearning/README.md
generated
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# qlearning
|
||||||
|
|
||||||
|
The qlearning package provides a series of interfaces and utilities to implement
|
||||||
|
the [Q-Learning](https://en.wikipedia.org/wiki/Q-learning) algorithm in
|
||||||
|
Go.
|
||||||
|
|
||||||
|
This project was largely inspired by [flappybird-qlearning-
|
||||||
|
bot](https://github.com/chncyhn/flappybird-qlearning-bot).
|
||||||
|
|
||||||
|
*Until a release is tagged, qlearning should be considered highly
|
||||||
|
experimental and mostly a fun toy.*
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ go get https://github.com/ecooper/qlearning
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
qlearning provides example implementations in the [examples](examples/)
|
||||||
|
directory of the project.
|
||||||
|
|
||||||
|
[hangman.go](examples/hangman.go) provides a naive implementation of
|
||||||
|
[Hangman](https://en.wikipedia.org/wiki/Hangman_(game)) for use with
|
||||||
|
qlearning.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ cd $GOPATH/src/github.com/ecooper/qlearning/examples
|
||||||
|
$ go run hangman.go -h
|
||||||
|
Usage of hangman:
|
||||||
|
-debug
|
||||||
|
Set debug
|
||||||
|
-games int
|
||||||
|
Play N games (default 5000000)
|
||||||
|
-progress int
|
||||||
|
Print progress messages every N games (default 1000)
|
||||||
|
-wordlist string
|
||||||
|
Path to a wordlist (default "./wordlist.txt")
|
||||||
|
-words int
|
||||||
|
Use N words from wordlist (default 10000)
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, running [hangman.go](examples/hangman.go) will play millions
|
||||||
|
of games against a 10,000-word corpus. That's a bit overkill for just
|
||||||
|
trying out qlearning. You can run it against a smaller number of words
|
||||||
|
for a few number of games using the `-games` and `-words` flags.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ go run hangman.go -words 100 -progress 1000 -games 5000
|
||||||
|
100 words loaded
|
||||||
|
1000 games played: 92 WINS 908 LOSSES 9% WIN RATE
|
||||||
|
2000 games played: 447 WINS 1553 LOSSES 36% WIN RATE
|
||||||
|
3000 games played: 1064 WINS 1936 LOSSES 62% WIN RATE
|
||||||
|
4000 games played: 1913 WINS 2087 LOSSES 85% WIN RATE
|
||||||
|
5000 games played: 2845 WINS 2155 LOSSES 93% WIN RATE
|
||||||
|
|
||||||
|
Agent performance: 5000 games played, 2845 WINS 2155 LOSSES 57% WIN RATE
|
||||||
|
```
|
||||||
|
|
||||||
|
"WIN RATE" per progress report is isolated within that cycle, a group of
|
||||||
|
1000 games in this example. The win rate is meant to show the velocity
|
||||||
|
of learning by the agent. If it is "learning", the win rate should be
|
||||||
|
increasing until reaching convergence.
|
||||||
|
|
||||||
|
As you can see, after 5000 games, the agent is able to "learn" and play
|
||||||
|
hangman against a 100-word vocabulary.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
See [godocs](https://godoc.org/github.com/ecooper/qlearning) for the
|
||||||
|
package documentation.
|
167
vendor/github.com/ecooper/qlearning/qlearning.go
generated
vendored
Normal file
167
vendor/github.com/ecooper/qlearning/qlearning.go
generated
vendored
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Package qlearning is an experimental set of interfaces and helpers to
|
||||||
|
// implement the Q-learning algorithm in Go.
|
||||||
|
//
|
||||||
|
// This is highly experimental and should be considered a toy.
|
||||||
|
//
|
||||||
|
// See https://github.com/ecooper/qlearning/tree/master/examples for
|
||||||
|
// implementation examples.
|
||||||
|
package qlearning
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// State is an interface wrapping the current state of the model.
|
||||||
|
type State interface {
|
||||||
|
|
||||||
|
// String returns a string representation of the given state.
|
||||||
|
// Implementers should take care to insure that this is a consistent
|
||||||
|
// hash for a given state.
|
||||||
|
String() string
|
||||||
|
|
||||||
|
// Next provides a slice of possible Actions that could be applied to
|
||||||
|
// a state.
|
||||||
|
Next() []Action
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action is an interface wrapping an action that can be applied to the
|
||||||
|
// model's current state.
|
||||||
|
//
|
||||||
|
// BUG (ecooper): A state should apply an action, not the other way
|
||||||
|
// around.
|
||||||
|
type Action interface {
|
||||||
|
String() string
|
||||||
|
Apply(State) State
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewarder is an interface wrapping the ability to provide a reward
|
||||||
|
// for the execution of an action in a given state.
|
||||||
|
type Rewarder interface {
|
||||||
|
// Reward calculates the reward value for a given action in a given
|
||||||
|
// state.
|
||||||
|
Reward(action *StateAction) float32
|
||||||
|
}
|
||||||
|
|
||||||
|
// Agent is an interface for a model's agent and is able to learn
|
||||||
|
// from actions and return the current Q-value of an action at a given state.
|
||||||
|
type Agent interface {
|
||||||
|
// Learn updates the model for a given state and action, using the
|
||||||
|
// provided Rewarder implementation.
|
||||||
|
Learn(*StateAction, Rewarder)
|
||||||
|
|
||||||
|
// Value returns the current Q-value for a State and Action.
|
||||||
|
Value(State, Action) float32
|
||||||
|
|
||||||
|
// Return a string representation of the Agent.
|
||||||
|
String() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// StateAction is a struct grouping an action to a given State. Additionally,
|
||||||
|
// a Value can be associated to StateAction, which is typically the Q-value.
|
||||||
|
type StateAction struct {
|
||||||
|
State State
|
||||||
|
Action Action
|
||||||
|
Value float32
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStateAction creates a new StateAction for a State and Action.
|
||||||
|
func NewStateAction(state State, action Action, val float32) *StateAction {
|
||||||
|
return &StateAction{
|
||||||
|
State: state,
|
||||||
|
Action: action,
|
||||||
|
Value: val,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next uses an Agent and State to find the highest scored Action.
|
||||||
|
//
|
||||||
|
// In the case of Q-value ties for a set of actions, a random
|
||||||
|
// value is selected.
|
||||||
|
func Next(agent Agent, state State) *StateAction {
|
||||||
|
best := make([]*StateAction, 0)
|
||||||
|
bestVal := float32(0.0)
|
||||||
|
|
||||||
|
for _, action := range state.Next() {
|
||||||
|
val := agent.Value(state, action)
|
||||||
|
|
||||||
|
if bestVal == float32(0.0) {
|
||||||
|
best = append(best, NewStateAction(state, action, val))
|
||||||
|
bestVal = val
|
||||||
|
} else {
|
||||||
|
if val > bestVal {
|
||||||
|
best = []*StateAction{NewStateAction(state, action, val)}
|
||||||
|
bestVal = val
|
||||||
|
} else if val == bestVal {
|
||||||
|
best = append(best, NewStateAction(state, action, val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best[rand.Intn(len(best))]
|
||||||
|
}
|
||||||
|
|
||||||
|
// SimpleAgent is an Agent implementation that stores Q-values in a
|
||||||
|
// map of maps.
|
||||||
|
type SimpleAgent struct {
|
||||||
|
q map[string]map[string]float32
|
||||||
|
lr float32
|
||||||
|
d float32
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSimpleAgent creates a SimpleAgent with the provided learning rate
|
||||||
|
// and discount factor.
|
||||||
|
func NewSimpleAgent(lr, d float32) *SimpleAgent {
|
||||||
|
return &SimpleAgent{
|
||||||
|
q: make(map[string]map[string]float32),
|
||||||
|
d: d,
|
||||||
|
lr: lr,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getActions returns the current Q-values for a given state.
|
||||||
|
func (agent *SimpleAgent) getActions(state string) map[string]float32 {
|
||||||
|
if _, ok := agent.q[state]; !ok {
|
||||||
|
agent.q[state] = make(map[string]float32)
|
||||||
|
}
|
||||||
|
|
||||||
|
return agent.q[state]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Learn updates the existing Q-value for the given State and Action
|
||||||
|
// using the Rewarder.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Q-learning#Algorithm
|
||||||
|
func (agent *SimpleAgent) Learn(action *StateAction, reward Rewarder) {
|
||||||
|
current := action.State.String()
|
||||||
|
next := action.Action.Apply(action.State).String()
|
||||||
|
|
||||||
|
actions := agent.getActions(current)
|
||||||
|
|
||||||
|
maxNextVal := float32(0.0)
|
||||||
|
for _, v := range agent.getActions(next) {
|
||||||
|
if v > maxNextVal {
|
||||||
|
maxNextVal = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentVal := actions[action.Action.String()]
|
||||||
|
actions[action.Action.String()] = currentVal + agent.lr*(reward.Reward(action)+agent.d*maxNextVal-currentVal)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value gets the current Q-value for a State and Action.
|
||||||
|
func (agent *SimpleAgent) Value(state State, action Action) float32 {
|
||||||
|
return agent.getActions(state.String())[action.String()]
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the current Q-value map as a printed string.
|
||||||
|
//
|
||||||
|
// BUG (ecooper): This is useless.
|
||||||
|
func (agent *SimpleAgent) String() string {
|
||||||
|
return fmt.Sprintf("%v", agent.q)
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
rand.Seed(time.Now().UTC().UnixNano())
|
||||||
|
}
|
6
vendor/modules.txt
vendored
6
vendor/modules.txt
vendored
@@ -49,7 +49,7 @@ github.com/containerd/continuity/pathdriver
|
|||||||
github.com/containerd/continuity/syscallx
|
github.com/containerd/continuity/syscallx
|
||||||
# github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d
|
# github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d
|
||||||
github.com/cpuguy83/go-md2man/v2/md2man
|
github.com/cpuguy83/go-md2man/v2/md2man
|
||||||
# github.com/crillab/gophersat v1.1.7
|
# github.com/crillab/gophersat v1.1.9-0.20200211102949-9a8bf7f2f0a3
|
||||||
github.com/crillab/gophersat/bf
|
github.com/crillab/gophersat/bf
|
||||||
github.com/crillab/gophersat/solver
|
github.com/crillab/gophersat/solver
|
||||||
# github.com/cyphar/filepath-securejoin v0.2.2
|
# github.com/cyphar/filepath-securejoin v0.2.2
|
||||||
@@ -93,6 +93,8 @@ github.com/docker/go-units
|
|||||||
github.com/docker/libnetwork/ipamutils
|
github.com/docker/libnetwork/ipamutils
|
||||||
# github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7
|
# github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7
|
||||||
github.com/docker/libtrust
|
github.com/docker/libtrust
|
||||||
|
# github.com/ecooper/qlearning v0.0.0-20160612200101-3075011a69fd
|
||||||
|
github.com/ecooper/qlearning
|
||||||
# github.com/fatih/color v1.7.0
|
# github.com/fatih/color v1.7.0
|
||||||
github.com/fatih/color
|
github.com/fatih/color
|
||||||
# github.com/fsnotify/fsnotify v1.4.7
|
# github.com/fsnotify/fsnotify v1.4.7
|
||||||
@@ -263,8 +265,8 @@ go.uber.org/multierr
|
|||||||
go.uber.org/tools/update-license
|
go.uber.org/tools/update-license
|
||||||
# go.uber.org/zap v1.13.0
|
# go.uber.org/zap v1.13.0
|
||||||
go.uber.org/zap
|
go.uber.org/zap
|
||||||
go.uber.org/zap/internal/bufferpool
|
|
||||||
go.uber.org/zap/zapcore
|
go.uber.org/zap/zapcore
|
||||||
|
go.uber.org/zap/internal/bufferpool
|
||||||
go.uber.org/zap/buffer
|
go.uber.org/zap/buffer
|
||||||
go.uber.org/zap/internal/color
|
go.uber.org/zap/internal/color
|
||||||
go.uber.org/zap/internal/exit
|
go.uber.org/zap/internal/exit
|
||||||
|
Reference in New Issue
Block a user