Support Add/Removal actions

Also keep the list of the wanted targets as we walk it. We will let the agent to try different solutions
This commit is contained in:
Ettore Di Giacinto 2020-02-11 09:51:52 +01:00
parent d4255b086b
commit ac6554c291
No known key found for this signature in database
GPG Key ID: 1ADA699B145A2D1C

View File

@ -17,17 +17,22 @@ package solver
import ( import (
"fmt" "fmt"
"strconv"
"github.com/mudler/gophersat/bf"
"github.com/ecooper/qlearning" "github.com/ecooper/qlearning"
"github.com/mudler/gophersat/bf"
pkg "github.com/mudler/luet/pkg/package" pkg "github.com/mudler/luet/pkg/package"
"github.com/pkg/errors" "github.com/pkg/errors"
) )
type ActionType int
const ( const (
Solved = 1 Solved = 1
NoSolution = iota NoSolution = iota
Going = iota Going = iota
ActionRemoved = iota
ActionAdded = iota
) )
//. "github.com/mudler/luet/pkg/logger" //. "github.com/mudler/luet/pkg/logger"
@ -49,7 +54,7 @@ type QLearningResolver struct {
ToAttempt int ToAttempt int
Attempted map[string]bool Attempted map[string]bool
Correct []string Correct []Choice
Solver PackageSolver Solver PackageSolver
Formula bf.Formula Formula bf.Formula
@ -80,7 +85,7 @@ func (resolver *QLearningResolver) Solve(f bf.Formula, s PackageSolver) (Package
resolver.Attempts = 99 resolver.Attempts = 99
resolver.Attempted = make(map[string]bool, len(resolver.Targets)) resolver.Attempted = make(map[string]bool, len(resolver.Targets))
resolver.Correct = make([]string, len(resolver.Targets), len(resolver.Targets)) resolver.Correct = make([]Choice, len(resolver.Targets), len(resolver.Targets))
resolver.debug = true resolver.debug = true
for resolver.IsComplete() == Going { for resolver.IsComplete() == Going {
// Pick the next move, which is going to be a letter choice. // Pick the next move, which is going to be a letter choice.
@ -97,6 +102,7 @@ func (resolver *QLearningResolver) Solve(f bf.Formula, s PackageSolver) (Package
// env changed. // env changed.
if resolver.Reward(action) > 0.0 { if resolver.Reward(action) > 0.0 {
resolver.Log("%s was correct", action.Action.String()) resolver.Log("%s was correct", action.Action.String())
resolver.ToAttempt = 0 // We won. As we had one sat, let's take it
} else { } else {
resolver.Log("%s was incorrect", action.Action.String()) resolver.Log("%s was incorrect", action.Action.String())
} }
@ -112,7 +118,7 @@ func (resolver *QLearningResolver) Solve(f bf.Formula, s PackageSolver) (Package
TARGET: TARGET:
for _, pack := range resolver.Targets { for _, pack := range resolver.Targets {
for _, w := range resolver.Correct { for _, w := range resolver.Correct {
if pack.String() == w { if pack.String() == w.String() {
fmt.Println("Skipping", pack.String()) fmt.Println("Skipping", pack.String())
continue TARGET continue TARGET
} }
@ -151,36 +157,48 @@ func (resolver *QLearningResolver) IsComplete() int {
return Solved return Solved
} }
// Choose applies a pack attempt, returning func (resolver *QLearningResolver) Try(c Choice) error {
// true if the formula returns sat. pack := c.String()
// resolver.Attempted[pack+strconv.Itoa(int(c.Action))] = true // increase the count
// Choose updates the resolver's state.
func (resolver *QLearningResolver) Choose(pack string) bool {
resolver.Attempted[pack] = true
s, _ := resolver.Solver.(*Solver) s, _ := resolver.Solver.(*Solver)
var filtered []pkg.Package var filtered []pkg.Package
var index int
//Filter by fingerprint switch c.Action {
for i, p := range s.Wanted { case ActionAdded:
for _, p := range resolver.Targets {
if p.String() == pack {
resolver.Solver.(*Solver).Wanted = append(resolver.Solver.(*Solver).Wanted, p)
}
}
case ActionRemoved:
for _, p := range s.Wanted {
if p.String() != pack { if p.String() != pack {
index = i
filtered = append(filtered, p) filtered = append(filtered, p)
} }
} }
resolver.Solver.(*Solver).Wanted = filtered resolver.Solver.(*Solver).Wanted = filtered
//resolver.Current = filtered default:
return errors.New("Nonvalid action")
}
_, err := resolver.Solver.Solve() _, err := resolver.Solver.Solve()
//resolver.Solver.(*Solver).Wanted = resolver.Targets
//resolver.Solver.(*Solver).Wanted = resolver.Targets return err
}
// Choose applies a pack attempt, returning
// true if the formula returns sat.
//
// Choose updates the resolver's state.
func (resolver *QLearningResolver) Choose(c Choice) bool {
err := resolver.Try(c)
if err == nil { if err == nil {
resolver.Correct[index] = pack resolver.Correct = append(resolver.Correct, c)
// resolver.Correct[index] = pack
resolver.ToAttempt-- resolver.ToAttempt--
} else { } else {
resolver.Attempts-- resolver.Attempts--
@ -221,15 +239,17 @@ func (resolver *QLearningResolver) Reward(action *qlearning.StateAction) float32
// Next creates a new slice of qlearning.Action instances. A possible // Next creates a new slice of qlearning.Action instances. A possible
// action is created for each package that could be removed from the formula's target // action is created for each package that could be removed from the formula's target
func (resolver *QLearningResolver) Next() []qlearning.Action { func (resolver *QLearningResolver) Next() []qlearning.Action {
actions := make([]qlearning.Action, 0, len(resolver.Targets)-1) actions := make([]qlearning.Action, 0, (len(resolver.Targets)-1)*2)
fmt.Println("Actions")
for _, pack := range resolver.Targets { for _, pack := range resolver.Targets {
// attempted := resolver.Attempted[pack.String()] // attempted := resolver.Attempted[pack.String()]
// if !attempted { // if !attempted {
actions = append(actions, &Choice{Package: pack.String()}) actions = append(actions, &Choice{Package: pack.String(), Action: ActionRemoved})
actions = append(actions, &Choice{Package: pack.String(), Action: ActionAdded})
fmt.Println(pack.GetName(), " -> Action added: Removed - Added")
// } // }
} }
fmt.Println("ACTIONS", actions)
fmt.Println("_______") fmt.Println("_______")
return actions return actions
} }
@ -246,12 +266,13 @@ func (resolver *QLearningResolver) Log(msg string, args ...interface{}) {
// String returns a consistent hash for the current env state to be // String returns a consistent hash for the current env state to be
// used in a qlearning.Agent. // used in a qlearning.Agent.
func (resolver *QLearningResolver) String() string { func (resolver *QLearningResolver) String() string {
return fmt.Sprintf("%s", resolver.Correct) return fmt.Sprintf("%v", resolver.Correct)
} }
// Choice implements qlearning.Action for a package choice for removal from wanted targets // Choice implements qlearning.Action for a package choice for removal from wanted targets
type Choice struct { type Choice struct {
Package string Package string
Action ActionType
} }
// String returns the character for the current action. // String returns the character for the current action.
@ -262,7 +283,7 @@ func (choice *Choice) String() string {
// Apply updates the state of the solver for the package choice. // Apply updates the state of the solver for the package choice.
func (choice *Choice) Apply(state qlearning.State) qlearning.State { func (choice *Choice) Apply(state qlearning.State) qlearning.State {
resolver := state.(*QLearningResolver) resolver := state.(*QLearningResolver)
resolver.Choose(choice.Package) resolver.Choose(*choice)
return resolver return resolver
} }