Fix the semantic meaning of etcd server within component statuses and metrics.

Instead of numerating all the etcd endpoints known by apiserver, we will
group them by purpose. `etcd-0` will be the default etcd, `etcd-1` will
be the first resource override, `etcd-2` will be the second override and
so on.
This commit is contained in:
Marek Siarkowicz 2023-07-19 14:25:54 +02:00
parent fa88c0b779
commit 03aad1f823
7 changed files with 36 additions and 42 deletions

View File

@ -117,5 +117,5 @@ func (server *EtcdServer) DoServerCheck() (probe.Result, string, error) {
if err != nil {
return probe.Failure, "", err
}
return probe.Success, "", err
return probe.Success, "ok", err
}

View File

@ -291,28 +291,17 @@ func Configs(storageConfig storagebackend.Config) []storagebackend.Config {
// Returns all storage configurations including those for group resource overrides
func configs(storageConfig storagebackend.Config, grOverrides map[schema.GroupResource]groupResourceOverrides) []storagebackend.Config {
locations := sets.NewString()
configs := []storagebackend.Config{}
for _, loc := range storageConfig.Transport.ServerList {
// copy
newConfig := storageConfig
newConfig.Transport.ServerList = []string{loc}
configs = append(configs, newConfig)
locations.Insert(loc)
}
configs := []storagebackend.Config{storageConfig}
for _, override := range grOverrides {
for _, loc := range override.etcdLocation {
if locations.Has(loc) {
continue
}
// copy
newConfig := storageConfig
override.Apply(&newConfig, &StorageCodecConfig{})
newConfig.Transport.ServerList = []string{loc}
configs = append(configs, newConfig)
locations.Insert(loc)
if len(override.etcdLocation) == 0 {
continue
}
// copy
newConfig := storageConfig
override.Apply(&newConfig, &StorageCodecConfig{})
newConfig.Transport.ServerList = override.etcdLocation
configs = append(configs, newConfig)
}
return configs
}

View File

@ -191,33 +191,36 @@ func TestConfigs(t *testing.T) {
defaultEtcdLocations := []string{"http://127.0.0.1", "http://127.0.0.2"}
testCases := []struct {
resource schema.GroupResource
resource *schema.GroupResource
servers []string
wantConfigs []storagebackend.Config
}{
{
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
},
},
{
resource: schema.GroupResource{Group: example.GroupName, Resource: "resource"},
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
},
},
{
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{"http://127.0.0.1:10000"},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000"}}, Prefix: "/registry", Paging: true},
},
},
{
resource: schema.GroupResource{Group: example.GroupName, Resource: "resource"},
resource: &schema.GroupResource{Group: example.GroupName, Resource: "resource"},
servers: []string{"http://127.0.0.1:10000", "https://127.0.0.1", "http://127.0.0.2"},
wantConfigs: []storagebackend.Config{
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"https://127.0.0.1"}}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: defaultEtcdLocations}, Prefix: "/registry", Paging: true},
{Transport: storagebackend.TransportConfig{ServerList: []string{"http://127.0.0.1:10000", "https://127.0.0.1", "http://127.0.0.2"}}, Prefix: "/registry", Paging: true},
},
},
}
@ -230,8 +233,8 @@ func TestConfigs(t *testing.T) {
},
}
storageFactory := NewDefaultStorageFactory(defaultConfig, "", codecs, NewDefaultResourceEncodingConfig(scheme), NewResourceConfig(), nil)
if len(test.servers) > 0 {
storageFactory.SetEtcdLocation(test.resource, test.servers)
if test.resource != nil {
storageFactory.SetEtcdLocation(*test.resource, test.servers)
}
got := storageFactory.Configs()

View File

@ -84,7 +84,7 @@ var (
},
[]string{"endpoint"},
)
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"server"}, nil, compbasemetrics.ALPHA, "")
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"cluster"}, nil, compbasemetrics.ALPHA, "")
storageMonitor = &monitorCollector{}
etcdEventsReceivedCounts = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
@ -274,21 +274,21 @@ func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric
}
for i, m := range monitors {
server := fmt.Sprintf("etcd-%d", i)
cluster := fmt.Sprintf("etcd-%d", i)
klog.V(4).InfoS("Start collecting storage metrics", "server", server)
klog.V(4).InfoS("Start collecting storage metrics", "cluster", cluster)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
metrics, err := m.Monitor(ctx)
cancel()
m.Close()
if err != nil {
klog.InfoS("Failed to get storage metrics", "server", server, "err", err)
klog.InfoS("Failed to get storage metrics", "cluster", cluster, "err", err)
continue
}
metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), server)
metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), cluster)
if err != nil {
klog.ErrorS(err, "Failed to create metric", "server", server)
klog.ErrorS(err, "Failed to create metric", "cluster", cluster)
}
ch <- metric
}

View File

@ -3760,7 +3760,7 @@
type: Custom
stabilityLevel: ALPHA
labels:
- server
- cluster
- name: transformation_duration_seconds
subsystem: storage
namespace: apiserver

View File

@ -932,7 +932,7 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
<td class="metric_type" data-type="custom">Custom</td>
<td class="metric_description">Size of the storage database file physically allocated in bytes.</td>
<td class="metric_labels_varying"><div class="metric_label">server</div></td>
<td class="metric_labels_varying"><div class="metric_label">cluster</div></td>
<td class="metric_labels_constant"></td>
<td class="metric_deprecated_version"></td></tr>
<tr class="metric"><td class="metric_name">apiserver_storage_transformation_duration_seconds</td>

View File

@ -77,7 +77,9 @@ func TestAPIServerProcessMetrics(t *testing.T) {
}
func TestAPIServerStorageMetrics(t *testing.T) {
s := kubeapiservertesting.StartTestServerOrDie(t, nil, nil, framework.SharedEtcd())
config := framework.SharedEtcd()
config.Transport.ServerList = []string{config.Transport.ServerList[0], config.Transport.ServerList[0]}
s := kubeapiservertesting.StartTestServerOrDie(t, nil, nil, config)
defer s.TearDownFn()
metrics, err := scrapeMetrics(s)