mirror of
https://github.com/woodpecker-ci/woodpecker.git
synced 2025-09-19 14:44:20 +00:00
more advanced health check logic
This commit is contained in:
@@ -1,9 +1,11 @@
|
|||||||
# docker build --rm -f Dockerfile.agent -t drone/agent .
|
# docker build --rm -f Dockerfile.agent -t drone/agent .
|
||||||
|
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
FROM centurylink/ca-certs
|
FROM centurylink/ca-certs
|
||||||
ENV GODEBUG=netdns=go
|
ENV GODEBUG=netdns=go
|
||||||
ADD release/drone-agent /bin/
|
ADD release/drone-agent /bin/
|
||||||
|
|
||||||
ENTRYPOINT ["/bin/drone-agent"]
|
|
||||||
|
|
||||||
HEALTHCHECK CMD ["/bin/drone-agent", "ping"]
|
HEALTHCHECK CMD ["/bin/drone-agent", "ping"]
|
||||||
|
|
||||||
|
ENTRYPOINT ["/bin/drone-agent"]
|
||||||
|
@@ -56,6 +56,9 @@ func loop(c *cli.Context) error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
counter.Polling = c.Int("max-procs")
|
||||||
|
counter.Running = 0
|
||||||
|
|
||||||
if c.BoolT("healthcheck") {
|
if c.BoolT("healthcheck") {
|
||||||
go http.ListenAndServe(":3000", nil)
|
go http.ListenAndServe(":3000", nil)
|
||||||
}
|
}
|
||||||
@@ -138,9 +141,22 @@ func run(ctx context.Context, client rpc.Peer, filter rpc.Filter) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
timeout := time.Hour
|
||||||
|
if minutes := work.Timeout; minutes != 0 {
|
||||||
|
timeout = time.Duration(minutes) * time.Minute
|
||||||
|
}
|
||||||
|
|
||||||
|
counter.Add(
|
||||||
|
work.ID,
|
||||||
|
timeout,
|
||||||
|
extractRepositoryName(work.Config), // hack
|
||||||
|
extractBuildNumber(work.Config), // hack
|
||||||
|
)
|
||||||
|
defer counter.Done(work.ID)
|
||||||
|
|
||||||
logger := log.With().
|
logger := log.With().
|
||||||
Str("repo", extractRepositoryName(work.Config)).
|
Str("repo", extractRepositoryName(work.Config)). // hack
|
||||||
Str("build", extractBuildNumber(work.Config)).
|
Str("build", extractBuildNumber(work.Config)). // hack
|
||||||
Str("id", work.ID).
|
Str("id", work.ID).
|
||||||
Logger()
|
Logger()
|
||||||
|
|
||||||
@@ -157,11 +173,6 @@ func run(ctx context.Context, client rpc.Peer, filter rpc.Filter) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout := time.Hour
|
|
||||||
if minutes := work.Timeout; minutes != 0 {
|
|
||||||
timeout = time.Duration(minutes) * time.Minute
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(ctxmeta, timeout)
|
ctx, cancel := context.WithTimeout(ctxmeta, timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
@@ -3,7 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/drone/drone/version"
|
"github.com/drone/drone/version"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
@@ -14,12 +17,17 @@ import (
|
|||||||
// https://github.com/mozilla-services/Dockerflow
|
// https://github.com/mozilla-services/Dockerflow
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
http.HandleFunc("/__heartbeat__", handleHeartbeat)
|
http.HandleFunc("/varz", handleStats)
|
||||||
http.HandleFunc("/__version__", handleVersion)
|
http.HandleFunc("/healthz", handleHeartbeat)
|
||||||
|
http.HandleFunc("/version", handleVersion)
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleHeartbeat(w http.ResponseWriter, r *http.Request) {
|
func handleHeartbeat(w http.ResponseWriter, r *http.Request) {
|
||||||
w.WriteHeader(200)
|
if counter.Healthy() {
|
||||||
|
w.WriteHeader(200)
|
||||||
|
} else {
|
||||||
|
w.WriteHeader(500)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleVersion(w http.ResponseWriter, r *http.Request) {
|
func handleVersion(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -31,15 +39,87 @@ func handleVersion(w http.ResponseWriter, r *http.Request) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func handleStats(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if counter.Healthy() {
|
||||||
|
w.WriteHeader(200)
|
||||||
|
} else {
|
||||||
|
w.WriteHeader(500)
|
||||||
|
}
|
||||||
|
w.Header().Add("Content-Type", "text/json")
|
||||||
|
counter.writeTo(w)
|
||||||
|
}
|
||||||
|
|
||||||
type versionResp struct {
|
type versionResp struct {
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// default statistics counter
|
||||||
|
var counter = &state{
|
||||||
|
Metadata: map[string]info{},
|
||||||
|
}
|
||||||
|
|
||||||
|
type state struct {
|
||||||
|
sync.Mutex `json:"-"`
|
||||||
|
Polling int `json:"polling_count"`
|
||||||
|
Running int `json:"running_count"`
|
||||||
|
Metadata map[string]info `json:"running"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type info struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Repo string `json:"repository"`
|
||||||
|
Build string `json:"build_number"`
|
||||||
|
Started time.Time `json:"build_started"`
|
||||||
|
Timeout time.Duration `json:"build_timeout"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *state) Add(id string, timeout time.Duration, repo, build string) {
|
||||||
|
s.Lock()
|
||||||
|
s.Polling--
|
||||||
|
s.Running++
|
||||||
|
s.Metadata[id] = info{
|
||||||
|
ID: id,
|
||||||
|
Repo: repo,
|
||||||
|
Build: build,
|
||||||
|
Timeout: timeout,
|
||||||
|
Started: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
s.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *state) Done(id string) {
|
||||||
|
s.Lock()
|
||||||
|
s.Polling++
|
||||||
|
s.Running--
|
||||||
|
delete(s.Metadata, id)
|
||||||
|
s.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *state) Healthy() bool {
|
||||||
|
s.Lock()
|
||||||
|
defer s.Unlock()
|
||||||
|
now := time.Now()
|
||||||
|
buf := time.Hour // 1 hour buffer
|
||||||
|
for _, item := range s.Metadata {
|
||||||
|
if now.After(item.Started.Add(item.Timeout).Add(buf)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *state) writeTo(w io.Writer) (int, error) {
|
||||||
|
s.Lock()
|
||||||
|
out, _ := json.Marshal(s)
|
||||||
|
s.Unlock()
|
||||||
|
return w.Write(out)
|
||||||
|
}
|
||||||
|
|
||||||
// handles pinging the endpoint and returns an error if the
|
// handles pinging the endpoint and returns an error if the
|
||||||
// agent is in an unhealthy state.
|
// agent is in an unhealthy state.
|
||||||
func pinger(c *cli.Context) error {
|
func pinger(c *cli.Context) error {
|
||||||
resp, err := http.Get("http://localhost:3000/__heartbeat__")
|
resp, err := http.Get("http://localhost:3000/healthz")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
45
cmd/drone-agent/health_test.go
Normal file
45
cmd/drone-agent/health_test.go
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHealthy(t *testing.T) {
|
||||||
|
s := state{}
|
||||||
|
s.Metadata = map[string]info{}
|
||||||
|
|
||||||
|
s.Add("1", time.Hour, "octocat/hello-world", "42")
|
||||||
|
|
||||||
|
if got, want := s.Metadata["1"].ID, "1"; got != want {
|
||||||
|
t.Errorf("got ID %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
if got, want := s.Metadata["1"].Timeout, time.Hour; got != want {
|
||||||
|
t.Errorf("got duration %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
if got, want := s.Metadata["1"].Repo, "octocat/hello-world"; got != want {
|
||||||
|
t.Errorf("got repository name %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Metadata["1"] = info{
|
||||||
|
Timeout: time.Hour,
|
||||||
|
Started: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
if s.Healthy() == false {
|
||||||
|
t.Error("want healthy status when timeout not exceeded, got false")
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Metadata["1"] = info{
|
||||||
|
Started: time.Now().UTC().Add(-(time.Minute * 30)),
|
||||||
|
}
|
||||||
|
if s.Healthy() == false {
|
||||||
|
t.Error("want healthy status when timeout+buffer not exceeded, got false")
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Metadata["1"] = info{
|
||||||
|
Started: time.Now().UTC().Add(-(time.Hour + time.Minute)),
|
||||||
|
}
|
||||||
|
if s.Healthy() == true {
|
||||||
|
t.Error("want unhealthy status when timeout+buffer not exceeded, got true")
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user