2020-02-10 08:41:09 +00:00
|
|
|
// Copyright © 2020 Ettore Di Giacinto <mudler@gentoo.org>
|
|
|
|
//
|
|
|
|
// This program is free software; you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License along
|
|
|
|
// with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package solver
|
|
|
|
|
|
|
|
import (
|
2020-02-11 13:52:24 +00:00
|
|
|
"encoding/json"
|
2020-02-10 16:16:35 +00:00
|
|
|
"fmt"
|
2020-02-11 08:51:52 +00:00
|
|
|
"strconv"
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 13:58:17 +00:00
|
|
|
"github.com/crillab/gophersat/bf"
|
2020-02-11 13:52:24 +00:00
|
|
|
"github.com/mudler/luet/pkg/helpers"
|
|
|
|
"gopkg.in/yaml.v2"
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
"github.com/ecooper/qlearning"
|
|
|
|
pkg "github.com/mudler/luet/pkg/package"
|
2020-02-10 08:41:09 +00:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
)
|
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
type ActionType int
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
const (
|
2020-02-11 13:52:24 +00:00
|
|
|
NoAction = 0
|
|
|
|
Solved = iota
|
2020-02-11 08:51:52 +00:00
|
|
|
NoSolution = iota
|
|
|
|
Going = iota
|
|
|
|
ActionRemoved = iota
|
|
|
|
ActionAdded = iota
|
2020-02-11 13:52:24 +00:00
|
|
|
|
|
|
|
DoNoop = false
|
2020-02-11 14:58:28 +00:00
|
|
|
|
|
|
|
ActionDomains = 3 // Bump it if you increase the number of actions
|
|
|
|
|
|
|
|
DefaultMaxAttempts = 9000
|
|
|
|
DefaultLearningRate = 0.7
|
|
|
|
DefaultDiscount = 1.0
|
|
|
|
DefaultInitialObserved = 999999
|
2020-02-12 10:22:23 +00:00
|
|
|
|
|
|
|
QLearningResolverType = "qlearning"
|
2020-02-10 16:16:35 +00:00
|
|
|
)
|
|
|
|
|
2020-02-10 08:41:09 +00:00
|
|
|
//. "github.com/mudler/luet/pkg/logger"
|
|
|
|
|
|
|
|
// PackageResolver assists PackageSolver on unsat cases
|
|
|
|
type PackageResolver interface {
|
2020-02-10 16:16:35 +00:00
|
|
|
Solve(bf.Formula, PackageSolver) (PackagesAssertions, error)
|
2020-02-10 08:41:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type DummyPackageResolver struct {
|
|
|
|
}
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
func (*DummyPackageResolver) Solve(bf.Formula, PackageSolver) (PackagesAssertions, error) {
|
|
|
|
return nil, errors.New("Could not satisfy the constraints. Try again by removing deps ")
|
|
|
|
}
|
|
|
|
|
|
|
|
type QLearningResolver struct {
|
|
|
|
Attempts int
|
|
|
|
|
|
|
|
ToAttempt int
|
2020-02-12 08:22:10 +00:00
|
|
|
|
|
|
|
attempts int
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
Attempted map[string]bool
|
|
|
|
|
|
|
|
Solver PackageSolver
|
|
|
|
Formula bf.Formula
|
|
|
|
|
|
|
|
Targets []pkg.Package
|
|
|
|
Current []pkg.Package
|
|
|
|
|
2020-02-11 13:52:24 +00:00
|
|
|
observedDelta int
|
|
|
|
observedDeltaChoice []pkg.Package
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
Agent *qlearning.SimpleAgent
|
2020-02-11 14:58:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func SimpleQLearningSolver() PackageResolver {
|
|
|
|
return NewQLearningResolver(DefaultLearningRate, DefaultDiscount, DefaultMaxAttempts, DefaultInitialObserved)
|
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 14:58:28 +00:00
|
|
|
// Defaults LearningRate 0.7, Discount 1.0
|
|
|
|
func NewQLearningResolver(LearningRate, Discount float32, MaxAttempts, initialObservedDelta int) PackageResolver {
|
|
|
|
return &QLearningResolver{
|
|
|
|
Agent: qlearning.NewSimpleAgent(LearningRate, Discount),
|
|
|
|
observedDelta: initialObservedDelta,
|
|
|
|
Attempts: MaxAttempts,
|
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (resolver *QLearningResolver) Solve(f bf.Formula, s PackageSolver) (PackagesAssertions, error) {
|
2020-02-12 10:22:23 +00:00
|
|
|
// Info("Using QLearning solver to resolve conflicts. Please be patient.")
|
2020-02-10 16:16:35 +00:00
|
|
|
resolver.Solver = s
|
|
|
|
|
|
|
|
s.SetResolver(&DummyPackageResolver{}) // Set dummy. Otherwise the attempts will run again a QLearning instance.
|
|
|
|
defer s.SetResolver(resolver) // Set back ourselves as resolver
|
|
|
|
|
|
|
|
resolver.Formula = f
|
2020-02-11 14:58:28 +00:00
|
|
|
|
|
|
|
// Our agent by default has a learning rate of 0.7 and discount of 1.0.
|
|
|
|
if resolver.Agent == nil {
|
|
|
|
resolver.Agent = qlearning.NewSimpleAgent(DefaultLearningRate, DefaultDiscount) // FIXME: Remove hardcoded values
|
|
|
|
}
|
|
|
|
|
|
|
|
// 3 are the action domains, counting noop regardless if enabled or not
|
|
|
|
// get the permutations to attempt
|
|
|
|
resolver.ToAttempt = int(helpers.Factorial(uint64(len(resolver.Solver.(*Solver).Wanted)-1) * ActionDomains)) // TODO: type assertions must go away
|
2020-02-10 16:16:35 +00:00
|
|
|
resolver.Targets = resolver.Solver.(*Solver).Wanted
|
|
|
|
|
2020-02-12 08:22:10 +00:00
|
|
|
resolver.attempts = resolver.Attempts
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
resolver.Attempted = make(map[string]bool, len(resolver.Targets))
|
|
|
|
|
|
|
|
for resolver.IsComplete() == Going {
|
|
|
|
// Pick the next move, which is going to be a letter choice.
|
|
|
|
action := qlearning.Next(resolver.Agent, resolver)
|
|
|
|
|
|
|
|
// Whatever that choice is, let's update our model for its
|
|
|
|
// impact. If the package chosen makes the formula sat,
|
|
|
|
// then this action will be positive. Otherwise, it will be
|
|
|
|
// negative.
|
|
|
|
resolver.Agent.Learn(action, resolver)
|
|
|
|
|
|
|
|
// Reward doesn't change state so we can check what the
|
|
|
|
// reward would be for this action, and report how the
|
|
|
|
// env changed.
|
2020-02-12 10:22:23 +00:00
|
|
|
// score := resolver.Reward(action)
|
|
|
|
// if score > 0.0 {
|
|
|
|
// resolver.Log("%s was correct", action.Action.String())
|
|
|
|
// } else {
|
|
|
|
// resolver.Log("%s was incorrect", action.Action.String())
|
|
|
|
// }
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we get good result, take it
|
2020-02-11 13:52:24 +00:00
|
|
|
// Take the result also if we did reached overall maximum attempts
|
|
|
|
if resolver.IsComplete() == Solved || resolver.IsComplete() == NoSolution {
|
|
|
|
|
|
|
|
if len(resolver.observedDeltaChoice) != 0 {
|
|
|
|
// Take the minimum delta observed choice result, and consume it (Try sets the wanted list)
|
|
|
|
resolver.Solver.(*Solver).Wanted = resolver.observedDeltaChoice
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
2020-02-11 13:52:24 +00:00
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
return resolver.Solver.Solve()
|
|
|
|
} else {
|
|
|
|
return nil, errors.New("QLearning resolver failed ")
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the current state.
|
|
|
|
func (resolver *QLearningResolver) IsComplete() int {
|
2020-02-12 08:22:10 +00:00
|
|
|
if resolver.attempts < 1 {
|
2020-02-10 16:16:35 +00:00
|
|
|
return NoSolution
|
|
|
|
}
|
|
|
|
|
|
|
|
if resolver.ToAttempt > 0 {
|
|
|
|
return Going
|
|
|
|
}
|
|
|
|
|
|
|
|
return Solved
|
|
|
|
}
|
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
func (resolver *QLearningResolver) Try(c Choice) error {
|
2020-02-11 13:52:24 +00:00
|
|
|
pack := c.Package
|
|
|
|
packtoAdd := pkg.FromString(pack)
|
2020-02-11 08:51:52 +00:00
|
|
|
resolver.Attempted[pack+strconv.Itoa(int(c.Action))] = true // increase the count
|
2020-02-10 16:16:35 +00:00
|
|
|
s, _ := resolver.Solver.(*Solver)
|
|
|
|
var filtered []pkg.Package
|
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
switch c.Action {
|
|
|
|
case ActionAdded:
|
2020-02-11 13:52:24 +00:00
|
|
|
found := false
|
|
|
|
for _, p := range s.Wanted {
|
2020-02-11 08:51:52 +00:00
|
|
|
if p.String() == pack {
|
2020-02-11 13:52:24 +00:00
|
|
|
found = true
|
|
|
|
break
|
2020-02-11 08:51:52 +00:00
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
2020-02-11 13:52:24 +00:00
|
|
|
if !found {
|
|
|
|
resolver.Solver.(*Solver).Wanted = append(resolver.Solver.(*Solver).Wanted, packtoAdd)
|
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
case ActionRemoved:
|
|
|
|
for _, p := range s.Wanted {
|
|
|
|
if p.String() != pack {
|
|
|
|
filtered = append(filtered, p)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
resolver.Solver.(*Solver).Wanted = filtered
|
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
|
|
|
|
_, err := resolver.Solver.Solve()
|
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Choose applies a pack attempt, returning
|
|
|
|
// true if the formula returns sat.
|
|
|
|
//
|
|
|
|
// Choose updates the resolver's state.
|
|
|
|
func (resolver *QLearningResolver) Choose(c Choice) bool {
|
2020-02-12 10:22:23 +00:00
|
|
|
//pack := pkg.FromString(c.Package)
|
|
|
|
|
2020-02-11 08:51:52 +00:00
|
|
|
err := resolver.Try(c)
|
2020-02-10 16:16:35 +00:00
|
|
|
|
|
|
|
if err == nil {
|
|
|
|
resolver.ToAttempt--
|
2020-02-12 08:22:10 +00:00
|
|
|
resolver.attempts-- // Decrease attempts - it's a barrier. We could also do not decrease it here, allowing more attempts to be made
|
2020-02-10 16:16:35 +00:00
|
|
|
} else {
|
2020-02-12 08:22:10 +00:00
|
|
|
resolver.attempts--
|
2020-02-10 16:16:35 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reward returns a score for a given qlearning.StateAction. Reward is a
|
|
|
|
// member of the qlearning.Rewarder interface. If the choice will make sat the formula, a positive score is returned.
|
|
|
|
// Otherwise, a static -1000 is returned.
|
|
|
|
func (resolver *QLearningResolver) Reward(action *qlearning.StateAction) float32 {
|
2020-02-11 13:52:24 +00:00
|
|
|
choice := action.Action.(*Choice)
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 13:52:24 +00:00
|
|
|
//_, err := resolver.Solver.Solve()
|
|
|
|
err := resolver.Try(*choice)
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 13:52:24 +00:00
|
|
|
toBeInstalled := len(resolver.Solver.(*Solver).Wanted)
|
|
|
|
originalTarget := len(resolver.Targets)
|
|
|
|
noaction := choice.Action == NoAction
|
|
|
|
delta := originalTarget - toBeInstalled
|
2020-02-10 16:16:35 +00:00
|
|
|
|
|
|
|
if err == nil {
|
2020-02-11 13:52:24 +00:00
|
|
|
// if toBeInstalled == originalTarget { // Base case: all the targets matches (it shouldn't happen, but lets put a higher)
|
|
|
|
// Debug("Target match, maximum score")
|
|
|
|
// return 24.0 / float32(len(resolver.Attempted))
|
|
|
|
|
|
|
|
// }
|
|
|
|
if DoNoop {
|
|
|
|
if noaction && toBeInstalled == 0 { // We decided to stay in the current state, and no targets have been chosen
|
|
|
|
return -100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if delta <= resolver.observedDelta { // Try to maximise observedDelta
|
|
|
|
resolver.observedDelta = delta
|
|
|
|
resolver.observedDeltaChoice = resolver.Solver.(*Solver).Wanted // we store it as this is our return value at the end
|
|
|
|
return 24.0 / float32(len(resolver.Attempted))
|
|
|
|
} else if toBeInstalled > 0 { // If we installed something, at least give a good score
|
|
|
|
return 24.0 / float32(len(resolver.Attempted))
|
|
|
|
}
|
2020-02-10 16:16:35 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1000
|
|
|
|
}
|
|
|
|
|
|
|
|
// Next creates a new slice of qlearning.Action instances. A possible
|
|
|
|
// action is created for each package that could be removed from the formula's target
|
|
|
|
func (resolver *QLearningResolver) Next() []qlearning.Action {
|
2020-02-11 13:52:24 +00:00
|
|
|
actions := make([]qlearning.Action, 0, (len(resolver.Targets)-1)*3)
|
2020-02-10 16:16:35 +00:00
|
|
|
|
2020-02-11 13:52:24 +00:00
|
|
|
TARGETS:
|
2020-02-10 16:16:35 +00:00
|
|
|
for _, pack := range resolver.Targets {
|
2020-02-11 13:52:24 +00:00
|
|
|
for _, current := range resolver.Solver.(*Solver).Wanted {
|
|
|
|
if current.String() == pack.String() {
|
|
|
|
actions = append(actions, &Choice{Package: pack.String(), Action: ActionRemoved})
|
|
|
|
continue TARGETS
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2020-02-11 08:51:52 +00:00
|
|
|
actions = append(actions, &Choice{Package: pack.String(), Action: ActionAdded})
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
2020-02-11 13:52:24 +00:00
|
|
|
|
|
|
|
if DoNoop {
|
|
|
|
actions = append(actions, &Choice{Package: "", Action: NoAction}) // NOOP
|
|
|
|
}
|
|
|
|
|
2020-02-10 16:16:35 +00:00
|
|
|
return actions
|
|
|
|
}
|
|
|
|
|
|
|
|
// Log is a wrapper of fmt.Printf. If Game.debug is true, Log will print
|
|
|
|
// to stdout.
|
|
|
|
func (resolver *QLearningResolver) Log(msg string, args ...interface{}) {
|
2020-02-12 08:22:10 +00:00
|
|
|
logMsg := fmt.Sprintf("(%d moves, %d remaining attempts) %s\n", len(resolver.Attempted), resolver.attempts, msg)
|
2020-02-12 10:22:23 +00:00
|
|
|
fmt.Println(fmt.Sprintf(logMsg, args...))
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// String returns a consistent hash for the current env state to be
|
|
|
|
// used in a qlearning.Agent.
|
|
|
|
func (resolver *QLearningResolver) String() string {
|
2020-02-11 13:52:24 +00:00
|
|
|
return fmt.Sprintf("%v", resolver.Solver.(*Solver).Wanted)
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Choice implements qlearning.Action for a package choice for removal from wanted targets
|
|
|
|
type Choice struct {
|
2020-02-11 13:52:24 +00:00
|
|
|
Package string `json:"pack"`
|
|
|
|
Action ActionType `json:"action"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func ChoiceFromString(s string) (*Choice, error) {
|
|
|
|
var p *Choice
|
|
|
|
err := yaml.Unmarshal([]byte(s), &p)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return p, nil
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// String returns the character for the current action.
|
|
|
|
func (choice *Choice) String() string {
|
2020-02-11 13:52:24 +00:00
|
|
|
data, err := json.Marshal(choice)
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return string(data)
|
2020-02-10 16:16:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Apply updates the state of the solver for the package choice.
|
|
|
|
func (choice *Choice) Apply(state qlearning.State) qlearning.State {
|
|
|
|
resolver := state.(*QLearningResolver)
|
2020-02-11 08:51:52 +00:00
|
|
|
resolver.Choose(*choice)
|
2020-02-10 16:16:35 +00:00
|
|
|
|
|
|
|
return resolver
|
2020-02-10 08:41:09 +00:00
|
|
|
}
|