mirror of
https://github.com/mudler/luet.git
synced 2025-08-27 19:38:49 +00:00
parent
711c039296
commit
2f6bef14d5
@ -8,7 +8,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/gophersat/solver"
|
||||
"github.com/crillab/gophersat/solver"
|
||||
)
|
||||
|
||||
// A Formula is any kind of boolean formula, not necessarily in CNF.
|
||||
@ -72,8 +72,8 @@ type trueConst struct{}
|
||||
// True is the constant denoting a tautology.
|
||||
var True Formula = trueConst{}
|
||||
|
||||
func (t trueConst) nnf() Formula { return t }
|
||||
func (t trueConst) String() string { return "⊤" }
|
||||
func (t trueConst) nnf() Formula { return t }
|
||||
func (t trueConst) String() string { return "⊤" }
|
||||
func (t trueConst) Eval(model map[string]bool) bool { return true }
|
||||
|
||||
// The "false" constant.
|
||||
@ -82,8 +82,8 @@ type falseConst struct{}
|
||||
// False is the constant denoting a contradiction.
|
||||
var False Formula = falseConst{}
|
||||
|
||||
func (f falseConst) nnf() Formula { return f }
|
||||
func (f falseConst) String() string { return "⊥" }
|
||||
func (f falseConst) nnf() Formula { return f }
|
||||
func (f falseConst) String() string { return "⊥" }
|
||||
func (f falseConst) Eval(model map[string]bool) bool { return false }
|
||||
|
||||
// Var generates a named boolean variable in a formula.
|
@ -15,11 +15,10 @@ func AtLeast1(lits ...int) CardConstr {
|
||||
|
||||
// AtMost1 returns a cardinality constraint stating that at most one of the given lits can be true.
|
||||
func AtMost1(lits ...int) CardConstr {
|
||||
negated := make([]int, len(lits))
|
||||
for i, lit := range lits {
|
||||
negated[i] = -lit
|
||||
lits[i] = -lit
|
||||
}
|
||||
return CardConstr{Lits: negated, AtLeast: len(lits) - 1}
|
||||
return CardConstr{Lits: lits, AtLeast: len(lits) - 1}
|
||||
}
|
||||
|
||||
// Exactly1 returns two cardinality constraints stating that exactly one of the given lits must be true.
|
@ -55,21 +55,17 @@ func ParseSlice(cnf [][]int) *Problem {
|
||||
return &pb
|
||||
}
|
||||
}
|
||||
pb.simplify2()
|
||||
pb.simplify()
|
||||
return &pb
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
return b == ' ' || b == '\t' || b == '\n' || b == '\r'
|
||||
}
|
||||
|
||||
// readInt reads an int from r.
|
||||
// 'b' is the last read byte. It can be a space, a '-' or a digit.
|
||||
// The int can be negated.
|
||||
// All spaces before the int value are ignored.
|
||||
// Can return EOF.
|
||||
func readInt(b *byte, r *bufio.Reader) (res int, err error) {
|
||||
for err == nil && isSpace(*b) {
|
||||
for err == nil && (*b == ' ' || *b == '\t' || *b == '\n' || *b == '\r') {
|
||||
*b, err = r.ReadByte()
|
||||
}
|
||||
if err == io.EOF {
|
||||
@ -92,7 +88,7 @@ func readInt(b *byte, r *bufio.Reader) (res int, err error) {
|
||||
}
|
||||
res = 10*res + int(*b-'0')
|
||||
*b, err = r.ReadByte()
|
||||
if isSpace(*b) {
|
||||
if *b == ' ' || *b == '\t' || *b == '\n' || *b == '\r' {
|
||||
break
|
||||
}
|
||||
}
|
@ -65,14 +65,6 @@ func ParseCardConstrs(constrs []CardConstr) *Problem {
|
||||
return &pb
|
||||
}
|
||||
|
||||
func (pb *Problem) appendClause(constr PBConstr) {
|
||||
lits := make([]Lit, len(constr.Lits))
|
||||
for j, val := range constr.Lits {
|
||||
lits[j] = IntToLit(int32(val))
|
||||
}
|
||||
pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, constr.AtLeast))
|
||||
}
|
||||
|
||||
// ParsePBConstrs parses and returns a PB problem from PBConstr values.
|
||||
func ParsePBConstrs(constrs []PBConstr) *Problem {
|
||||
var pb Problem
|
||||
@ -108,7 +100,11 @@ func ParsePBConstrs(constrs []PBConstr) *Problem {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pb.appendClause(constr)
|
||||
lits := make([]Lit, len(constr.Lits))
|
||||
for j, val := range constr.Lits {
|
||||
lits[j] = IntToLit(int32(val))
|
||||
}
|
||||
pb.Clauses = append(pb.Clauses, NewPBClause(lits, constr.Weights, card))
|
||||
}
|
||||
}
|
||||
pb.Model = make([]decLevel, pb.NbVars)
|
@ -87,16 +87,10 @@ func New(problem *Problem) *Solver {
|
||||
return &Solver{status: Unsat}
|
||||
}
|
||||
nbVars := problem.NbVars
|
||||
|
||||
trailCap := nbVars
|
||||
if len(problem.Units) > trailCap {
|
||||
trailCap = len(problem.Units)
|
||||
}
|
||||
|
||||
s := &Solver{
|
||||
nbVars: nbVars,
|
||||
status: problem.Status,
|
||||
trail: make([]Lit, len(problem.Units), trailCap),
|
||||
trail: make([]Lit, len(problem.Units), nbVars),
|
||||
model: problem.Model,
|
||||
activity: make([]float64, nbVars),
|
||||
polarity: make([]bool, nbVars),
|
||||
@ -349,7 +343,7 @@ func (s *Solver) propagateAndSearch(lit Lit, lvl decLevel) Status {
|
||||
return Indet
|
||||
}
|
||||
if s.Stats.NbConflicts >= s.wl.idxReduce*s.wl.nbMax {
|
||||
s.wl.idxReduce = s.Stats.NbConflicts/s.wl.nbMax + 1
|
||||
s.wl.idxReduce = (s.Stats.NbConflicts / s.wl.nbMax) + 1
|
||||
s.reduceLearned()
|
||||
s.bumpNbMax()
|
||||
}
|
||||
@ -744,7 +738,7 @@ func (s *Solver) Optimal(results chan Result, stop chan struct{}) (res Result) {
|
||||
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
||||
cost = 0
|
||||
for i, lit := range s.minLits {
|
||||
if s.model[lit.Var()] > 0 == lit.IsPositive() {
|
||||
if (s.model[lit.Var()] > 0) == lit.IsPositive() {
|
||||
if s.minWeights == nil {
|
||||
cost++
|
||||
} else {
|
||||
@ -809,7 +803,7 @@ func (s *Solver) Minimize() int {
|
||||
copy(s.lastModel, s.model) // Save this model: it might be the last one
|
||||
cost = 0
|
||||
for i, lit := range s.minLits {
|
||||
if s.model[lit.Var()] > 0 == lit.IsPositive() {
|
||||
if (s.model[lit.Var()] > 0) == lit.IsPositive() {
|
||||
if s.minWeights == nil {
|
||||
cost++
|
||||
} else {
|
24
vendor/github.com/ecooper/qlearning/.gitignore
generated
vendored
24
vendor/github.com/ecooper/qlearning/.gitignore
generated
vendored
@ -1,24 +0,0 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
21
vendor/github.com/ecooper/qlearning/LICENSE
generated
vendored
21
vendor/github.com/ecooper/qlearning/LICENSE
generated
vendored
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2016 Eric Cooper
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
72
vendor/github.com/ecooper/qlearning/README.md
generated
vendored
72
vendor/github.com/ecooper/qlearning/README.md
generated
vendored
@ -1,72 +0,0 @@
|
||||
# qlearning
|
||||
|
||||
The qlearning package provides a series of interfaces and utilities to implement
|
||||
the [Q-Learning](https://en.wikipedia.org/wiki/Q-learning) algorithm in
|
||||
Go.
|
||||
|
||||
This project was largely inspired by [flappybird-qlearning-
|
||||
bot](https://github.com/chncyhn/flappybird-qlearning-bot).
|
||||
|
||||
*Until a release is tagged, qlearning should be considered highly
|
||||
experimental and mostly a fun toy.*
|
||||
|
||||
## Installation
|
||||
|
||||
```shell
|
||||
$ go get https://github.com/ecooper/qlearning
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
|
||||
qlearning provides example implementations in the [examples](examples/)
|
||||
directory of the project.
|
||||
|
||||
[hangman.go](examples/hangman.go) provides a naive implementation of
|
||||
[Hangman](https://en.wikipedia.org/wiki/Hangman_(game)) for use with
|
||||
qlearning.
|
||||
|
||||
```shell
|
||||
$ cd $GOPATH/src/github.com/ecooper/qlearning/examples
|
||||
$ go run hangman.go -h
|
||||
Usage of hangman:
|
||||
-debug
|
||||
Set debug
|
||||
-games int
|
||||
Play N games (default 5000000)
|
||||
-progress int
|
||||
Print progress messages every N games (default 1000)
|
||||
-wordlist string
|
||||
Path to a wordlist (default "./wordlist.txt")
|
||||
-words int
|
||||
Use N words from wordlist (default 10000)
|
||||
```
|
||||
|
||||
By default, running [hangman.go](examples/hangman.go) will play millions
|
||||
of games against a 10,000-word corpus. That's a bit overkill for just
|
||||
trying out qlearning. You can run it against a smaller number of words
|
||||
for a few number of games using the `-games` and `-words` flags.
|
||||
|
||||
```shell
|
||||
$ go run hangman.go -words 100 -progress 1000 -games 5000
|
||||
100 words loaded
|
||||
1000 games played: 92 WINS 908 LOSSES 9% WIN RATE
|
||||
2000 games played: 447 WINS 1553 LOSSES 36% WIN RATE
|
||||
3000 games played: 1064 WINS 1936 LOSSES 62% WIN RATE
|
||||
4000 games played: 1913 WINS 2087 LOSSES 85% WIN RATE
|
||||
5000 games played: 2845 WINS 2155 LOSSES 93% WIN RATE
|
||||
|
||||
Agent performance: 5000 games played, 2845 WINS 2155 LOSSES 57% WIN RATE
|
||||
```
|
||||
|
||||
"WIN RATE" per progress report is isolated within that cycle, a group of
|
||||
1000 games in this example. The win rate is meant to show the velocity
|
||||
of learning by the agent. If it is "learning", the win rate should be
|
||||
increasing until reaching convergence.
|
||||
|
||||
As you can see, after 5000 games, the agent is able to "learn" and play
|
||||
hangman against a 100-word vocabulary.
|
||||
|
||||
## Usage
|
||||
|
||||
See [godocs](https://godoc.org/github.com/ecooper/qlearning) for the
|
||||
package documentation.
|
167
vendor/github.com/ecooper/qlearning/qlearning.go
generated
vendored
167
vendor/github.com/ecooper/qlearning/qlearning.go
generated
vendored
@ -1,167 +0,0 @@
|
||||
// Package qlearning is an experimental set of interfaces and helpers to
|
||||
// implement the Q-learning algorithm in Go.
|
||||
//
|
||||
// This is highly experimental and should be considered a toy.
|
||||
//
|
||||
// See https://github.com/ecooper/qlearning/tree/master/examples for
|
||||
// implementation examples.
|
||||
package qlearning
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
)
|
||||
|
||||
// State is an interface wrapping the current state of the model.
|
||||
type State interface {
|
||||
|
||||
// String returns a string representation of the given state.
|
||||
// Implementers should take care to insure that this is a consistent
|
||||
// hash for a given state.
|
||||
String() string
|
||||
|
||||
// Next provides a slice of possible Actions that could be applied to
|
||||
// a state.
|
||||
Next() []Action
|
||||
}
|
||||
|
||||
// Action is an interface wrapping an action that can be applied to the
|
||||
// model's current state.
|
||||
//
|
||||
// BUG (ecooper): A state should apply an action, not the other way
|
||||
// around.
|
||||
type Action interface {
|
||||
String() string
|
||||
Apply(State) State
|
||||
}
|
||||
|
||||
// Rewarder is an interface wrapping the ability to provide a reward
|
||||
// for the execution of an action in a given state.
|
||||
type Rewarder interface {
|
||||
// Reward calculates the reward value for a given action in a given
|
||||
// state.
|
||||
Reward(action *StateAction) float32
|
||||
}
|
||||
|
||||
// Agent is an interface for a model's agent and is able to learn
|
||||
// from actions and return the current Q-value of an action at a given state.
|
||||
type Agent interface {
|
||||
// Learn updates the model for a given state and action, using the
|
||||
// provided Rewarder implementation.
|
||||
Learn(*StateAction, Rewarder)
|
||||
|
||||
// Value returns the current Q-value for a State and Action.
|
||||
Value(State, Action) float32
|
||||
|
||||
// Return a string representation of the Agent.
|
||||
String() string
|
||||
}
|
||||
|
||||
// StateAction is a struct grouping an action to a given State. Additionally,
|
||||
// a Value can be associated to StateAction, which is typically the Q-value.
|
||||
type StateAction struct {
|
||||
State State
|
||||
Action Action
|
||||
Value float32
|
||||
}
|
||||
|
||||
// NewStateAction creates a new StateAction for a State and Action.
|
||||
func NewStateAction(state State, action Action, val float32) *StateAction {
|
||||
return &StateAction{
|
||||
State: state,
|
||||
Action: action,
|
||||
Value: val,
|
||||
}
|
||||
}
|
||||
|
||||
// Next uses an Agent and State to find the highest scored Action.
|
||||
//
|
||||
// In the case of Q-value ties for a set of actions, a random
|
||||
// value is selected.
|
||||
func Next(agent Agent, state State) *StateAction {
|
||||
best := make([]*StateAction, 0)
|
||||
bestVal := float32(0.0)
|
||||
|
||||
for _, action := range state.Next() {
|
||||
val := agent.Value(state, action)
|
||||
|
||||
if bestVal == float32(0.0) {
|
||||
best = append(best, NewStateAction(state, action, val))
|
||||
bestVal = val
|
||||
} else {
|
||||
if val > bestVal {
|
||||
best = []*StateAction{NewStateAction(state, action, val)}
|
||||
bestVal = val
|
||||
} else if val == bestVal {
|
||||
best = append(best, NewStateAction(state, action, val))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return best[rand.Intn(len(best))]
|
||||
}
|
||||
|
||||
// SimpleAgent is an Agent implementation that stores Q-values in a
|
||||
// map of maps.
|
||||
type SimpleAgent struct {
|
||||
q map[string]map[string]float32
|
||||
lr float32
|
||||
d float32
|
||||
}
|
||||
|
||||
// NewSimpleAgent creates a SimpleAgent with the provided learning rate
|
||||
// and discount factor.
|
||||
func NewSimpleAgent(lr, d float32) *SimpleAgent {
|
||||
return &SimpleAgent{
|
||||
q: make(map[string]map[string]float32),
|
||||
d: d,
|
||||
lr: lr,
|
||||
}
|
||||
}
|
||||
|
||||
// getActions returns the current Q-values for a given state.
|
||||
func (agent *SimpleAgent) getActions(state string) map[string]float32 {
|
||||
if _, ok := agent.q[state]; !ok {
|
||||
agent.q[state] = make(map[string]float32)
|
||||
}
|
||||
|
||||
return agent.q[state]
|
||||
}
|
||||
|
||||
// Learn updates the existing Q-value for the given State and Action
|
||||
// using the Rewarder.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Q-learning#Algorithm
|
||||
func (agent *SimpleAgent) Learn(action *StateAction, reward Rewarder) {
|
||||
current := action.State.String()
|
||||
next := action.Action.Apply(action.State).String()
|
||||
|
||||
actions := agent.getActions(current)
|
||||
|
||||
maxNextVal := float32(0.0)
|
||||
for _, v := range agent.getActions(next) {
|
||||
if v > maxNextVal {
|
||||
maxNextVal = v
|
||||
}
|
||||
}
|
||||
|
||||
currentVal := actions[action.Action.String()]
|
||||
actions[action.Action.String()] = currentVal + agent.lr*(reward.Reward(action)+agent.d*maxNextVal-currentVal)
|
||||
}
|
||||
|
||||
// Value gets the current Q-value for a State and Action.
|
||||
func (agent *SimpleAgent) Value(state State, action Action) float32 {
|
||||
return agent.getActions(state.String())[action.String()]
|
||||
}
|
||||
|
||||
// String returns the current Q-value map as a printed string.
|
||||
//
|
||||
// BUG (ecooper): This is useless.
|
||||
func (agent *SimpleAgent) String() string {
|
||||
return fmt.Sprintf("%v", agent.q)
|
||||
}
|
||||
|
||||
func init() {
|
||||
rand.Seed(time.Now().UTC().UnixNano())
|
||||
}
|
10
vendor/modules.txt
vendored
10
vendor/modules.txt
vendored
@ -49,6 +49,9 @@ github.com/containerd/continuity/pathdriver
|
||||
github.com/containerd/continuity/syscallx
|
||||
# github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d
|
||||
github.com/cpuguy83/go-md2man/v2/md2man
|
||||
# github.com/crillab/gophersat v1.1.7
|
||||
github.com/crillab/gophersat/bf
|
||||
github.com/crillab/gophersat/solver
|
||||
# github.com/cyphar/filepath-securejoin v0.2.2
|
||||
github.com/cyphar/filepath-securejoin
|
||||
# github.com/docker/distribution v2.7.0+incompatible
|
||||
@ -90,8 +93,6 @@ github.com/docker/go-units
|
||||
github.com/docker/libnetwork/ipamutils
|
||||
# github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7
|
||||
github.com/docker/libtrust
|
||||
# github.com/ecooper/qlearning v0.0.0-20160612200101-3075011a69fd
|
||||
github.com/ecooper/qlearning
|
||||
# github.com/fatih/color v1.7.0
|
||||
github.com/fatih/color
|
||||
# github.com/fsnotify/fsnotify v1.4.7
|
||||
@ -156,9 +157,6 @@ github.com/mattn/go-isatty
|
||||
github.com/mitchellh/mapstructure
|
||||
# github.com/mudler/docker-companion v0.4.6-0.20191110154655-b8b364100616
|
||||
github.com/mudler/docker-companion/api
|
||||
# github.com/mudler/gophersat v1.1.5-0.20200211080010-645700b4b7c0
|
||||
github.com/mudler/gophersat/bf
|
||||
github.com/mudler/gophersat/solver
|
||||
# github.com/onsi/ginkgo v1.10.1
|
||||
github.com/onsi/ginkgo
|
||||
github.com/onsi/ginkgo/config
|
||||
@ -265,8 +263,8 @@ go.uber.org/multierr
|
||||
go.uber.org/tools/update-license
|
||||
# go.uber.org/zap v1.13.0
|
||||
go.uber.org/zap
|
||||
go.uber.org/zap/zapcore
|
||||
go.uber.org/zap/internal/bufferpool
|
||||
go.uber.org/zap/zapcore
|
||||
go.uber.org/zap/buffer
|
||||
go.uber.org/zap/internal/color
|
||||
go.uber.org/zap/internal/exit
|
||||
|
Loading…
Reference in New Issue
Block a user