1
0
mirror of https://github.com/rancher/rke.git synced 2025-09-15 22:49:13 +00:00

Return proper error in etcd healthcheck

This commit is contained in:
Alena Prokharchyk
2019-10-30 15:11:26 -07:00
parent 1bee3e452f
commit fa9410faeb
2 changed files with 40 additions and 24 deletions

View File

@@ -19,7 +19,7 @@ import (
func getEtcdClient(ctx context.Context, etcdHost *hosts.Host, localConnDialerFactory hosts.DialerFactory, cert, key []byte) (etcdclient.Client, error) {
dialer, err := getEtcdDialer(localConnDialerFactory, etcdHost)
if err != nil {
return nil, fmt.Errorf("Failed to create a dialer for host [%s]: %v", etcdHost.Address, err)
return nil, fmt.Errorf("failed to create a dialer for host [%s]: %v", etcdHost.Address, err)
}
tlsConfig, err := getEtcdTLSConfig(cert, key)
if err != nil {
@@ -40,17 +40,18 @@ func getEtcdClient(ctx context.Context, etcdHost *hosts.Host, localConnDialerFac
return etcdclient.New(cfg)
}
func isEtcdHealthy(ctx context.Context, localConnDialerFactory hosts.DialerFactory, host *hosts.Host, cert, key []byte, url string) bool {
logrus.Debugf("[etcd] Check etcd cluster health")
func isEtcdHealthy(localConnDialerFactory hosts.DialerFactory, host *hosts.Host, cert, key []byte, url string) error {
logrus.Debugf("[etcd] check etcd cluster health on host [%s]", host.Address)
var finalErr error
var healthy string
for i := 0; i < 3; i++ {
dialer, err := getEtcdDialer(localConnDialerFactory, host)
if err != nil {
return false
return err
}
tlsConfig, err := getEtcdTLSConfig(cert, key)
if err != nil {
logrus.Debugf("[etcd] Failed to create etcd tls config for host [%s]: %v", host.Address, err)
return false
return fmt.Errorf("[etcd] failed to create etcd tls config for host [%s]: %v", host.Address, err)
}
hc := http.Client{
@@ -60,33 +61,38 @@ func isEtcdHealthy(ctx context.Context, localConnDialerFactory hosts.DialerFacto
TLSHandshakeTimeout: 10 * time.Second,
},
}
healthy, err := getHealthEtcd(hc, host, url)
if err != nil {
logrus.Debug(err)
healthy, finalErr = getHealthEtcd(hc, host, url)
if finalErr != nil {
logrus.Debugf("[etcd] failed to check health for etcd host [%s]: %v", host.Address, finalErr)
time.Sleep(5 * time.Second)
continue
}
// log in debug here as we don't want to log in warn on every iteration
// the error will be logged in the caller stack
logrus.Debugf("[etcd] etcd host [%s] reported healthy=%s", host.Address, healthy)
if healthy == "true" {
logrus.Debugf("[etcd] etcd cluster is healthy")
return true
return nil
}
}
return false
if finalErr != nil {
return fmt.Errorf("[etcd] host [%s] failed to check etcd health: %v", host.Address, finalErr)
}
return fmt.Errorf("[etcd] host [%s] reported healthy=%s", host.Address, healthy)
}
func getHealthEtcd(hc http.Client, host *hosts.Host, url string) (string, error) {
healthy := struct{ Health string }{}
resp, err := hc.Get(url)
if err != nil {
return healthy.Health, fmt.Errorf("Failed to get /health for host [%s]: %v", host.Address, err)
return healthy.Health, fmt.Errorf("failed to get /health for host [%s]: %v", host.Address, err)
}
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return healthy.Health, fmt.Errorf("Failed to read response of /health for host [%s]: %v", host.Address, err)
return healthy.Health, fmt.Errorf("failed to read response of /health for host [%s]: %v", host.Address, err)
}
resp.Body.Close()
if err := json.Unmarshal(bytes, &healthy); err != nil {
return healthy.Health, fmt.Errorf("Failed to unmarshal response of /health for host [%s]: %v", host.Address, err)
return healthy.Health, fmt.Errorf("failed to unmarshal response of /health for host [%s]: %v", host.Address, err)
}
return healthy.Health, nil
}