mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 09:49:50 +00:00
PLEG should report events if a container is removed
Currently, pleg would report a event if a container transitions from running to exited between relisting. However, if would not report any event if a container gets stopped and removed between relisting. This event will eventually be handled when the pod syncs periodically, but this is undesirable. This change ensures that we detect all such events.
This commit is contained in:
parent
930d796120
commit
73a4f8225c
@ -442,6 +442,15 @@ func (p *Pod) FindContainerByName(containerName string) *Container {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Pod) FindContainerByID(id ContainerID) *Container {
|
||||||
|
for _, c := range p.Containers {
|
||||||
|
if c.ID == id {
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// ToAPIPod converts Pod to api.Pod. Note that if a field in api.Pod has no
|
// ToAPIPod converts Pod to api.Pod. Note that if a field in api.Pod has no
|
||||||
// corresponding field in Pod, the field would not be populated.
|
// corresponding field in Pod, the field would not be populated.
|
||||||
func (p *Pod) ToAPIPod() *api.Pod {
|
func (p *Pod) ToAPIPod() *api.Pod {
|
||||||
|
@ -49,16 +49,43 @@ type GenericPLEG struct {
|
|||||||
runtime kubecontainer.Runtime
|
runtime kubecontainer.Runtime
|
||||||
// The channel from which the subscriber listens events.
|
// The channel from which the subscriber listens events.
|
||||||
eventChannel chan *PodLifecycleEvent
|
eventChannel chan *PodLifecycleEvent
|
||||||
// The internal cache for container information.
|
// The internal cache for pod/container information.
|
||||||
containers map[string]containerInfo
|
podRecords podRecords
|
||||||
// Time of the last relisting.
|
// Time of the last relisting.
|
||||||
lastRelistTime time.Time
|
lastRelistTime time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type containerInfo struct {
|
// plegContainerState has an one-to-one mapping to the
|
||||||
podID types.UID
|
// kubecontainer.ContainerState except for the Non-existent state. This state
|
||||||
state kubecontainer.ContainerState
|
// is introduced here to complete the state transition scenarios.
|
||||||
|
type plegContainerState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
plegContainerRunning plegContainerState = "running"
|
||||||
|
plegContainerExited plegContainerState = "exited"
|
||||||
|
plegContainerUnknown plegContainerState = "unknown"
|
||||||
|
plegContainerNonExistent plegContainerState = "non-existent"
|
||||||
|
)
|
||||||
|
|
||||||
|
func convertState(state kubecontainer.ContainerState) plegContainerState {
|
||||||
|
switch state {
|
||||||
|
case kubecontainer.ContainerStateRunning:
|
||||||
|
return plegContainerRunning
|
||||||
|
case kubecontainer.ContainerStateExited:
|
||||||
|
return plegContainerExited
|
||||||
|
case kubecontainer.ContainerStateUnknown:
|
||||||
|
return plegContainerUnknown
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unrecognized container state: %v", state))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type podRecord struct {
|
||||||
|
old *kubecontainer.Pod
|
||||||
|
current *kubecontainer.Pod
|
||||||
|
}
|
||||||
|
|
||||||
|
type podRecords map[types.UID]*podRecord
|
||||||
|
|
||||||
func NewGenericPLEG(runtime kubecontainer.Runtime, channelCapacity int,
|
func NewGenericPLEG(runtime kubecontainer.Runtime, channelCapacity int,
|
||||||
relistPeriod time.Duration) PodLifecycleEventGenerator {
|
relistPeriod time.Duration) PodLifecycleEventGenerator {
|
||||||
@ -66,7 +93,7 @@ func NewGenericPLEG(runtime kubecontainer.Runtime, channelCapacity int,
|
|||||||
relistPeriod: relistPeriod,
|
relistPeriod: relistPeriod,
|
||||||
runtime: runtime,
|
runtime: runtime,
|
||||||
eventChannel: make(chan *PodLifecycleEvent, channelCapacity),
|
eventChannel: make(chan *PodLifecycleEvent, channelCapacity),
|
||||||
containers: make(map[string]containerInfo),
|
podRecords: make(podRecords),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,18 +109,30 @@ func (g *GenericPLEG) Start() {
|
|||||||
go util.Until(g.relist, g.relistPeriod, util.NeverStop)
|
go util.Until(g.relist, g.relistPeriod, util.NeverStop)
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateEvent(podID types.UID, cid string, oldState, newState kubecontainer.ContainerState) *PodLifecycleEvent {
|
func generateEvent(podID types.UID, cid string, oldState, newState plegContainerState) *PodLifecycleEvent {
|
||||||
|
glog.V(7).Infof("GenericPLEG: %v/%v: %v -> %v", podID, cid, oldState, newState)
|
||||||
if newState == oldState {
|
if newState == oldState {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
switch newState {
|
switch newState {
|
||||||
case kubecontainer.ContainerStateRunning:
|
case plegContainerRunning:
|
||||||
return &PodLifecycleEvent{ID: podID, Type: ContainerStarted, Data: cid}
|
return &PodLifecycleEvent{ID: podID, Type: ContainerStarted, Data: cid}
|
||||||
case kubecontainer.ContainerStateExited:
|
case plegContainerExited:
|
||||||
return &PodLifecycleEvent{ID: podID, Type: ContainerDied, Data: cid}
|
return &PodLifecycleEvent{ID: podID, Type: ContainerDied, Data: cid}
|
||||||
case kubecontainer.ContainerStateUnknown:
|
case plegContainerUnknown:
|
||||||
// Don't generate any event if the status is unknown.
|
// Don't generate any event if the status is unknown.
|
||||||
return nil
|
return nil
|
||||||
|
case plegContainerNonExistent:
|
||||||
|
// We report "ContainerDied" when container was stopped OR removed. We
|
||||||
|
// may want to distinguish the two cases in the future.
|
||||||
|
switch oldState {
|
||||||
|
case plegContainerExited:
|
||||||
|
// We already reported that the container died before. There is no
|
||||||
|
// need to do it again.
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return &PodLifecycleEvent{ID: podID, Type: ContainerDied, Data: cid}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("unrecognized container state: %v", newState))
|
panic(fmt.Sprintf("unrecognized container state: %v", newState))
|
||||||
}
|
}
|
||||||
@ -116,40 +155,124 @@ func (g *GenericPLEG) relist() {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// Get all the pods.
|
// Get all the pods.
|
||||||
pods, err := g.runtime.GetPods(true)
|
podList, err := g.runtime.GetPods(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err)
|
glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
pods := kubecontainer.Pods(podList)
|
||||||
|
|
||||||
events := []*PodLifecycleEvent{}
|
eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
|
||||||
containers := make(map[string]containerInfo, len(g.containers))
|
// Process all currently visible pods.
|
||||||
// Create a new containers map, compares container statuses, and generates
|
for _, pod := range pods {
|
||||||
// correspoinding events.
|
g.podRecords.setCurrent(pod)
|
||||||
for _, p := range pods {
|
// Locate the old pod.
|
||||||
for _, c := range p.Containers {
|
oldPod := g.podRecords.getOld(pod.ID)
|
||||||
cid := c.ID.ID
|
|
||||||
// Get the of existing container info. Defaults to state unknown.
|
// Process all currently visible containers in the pod.
|
||||||
oldState := kubecontainer.ContainerStateUnknown
|
for _, container := range pod.Containers {
|
||||||
if info, ok := g.containers[cid]; ok {
|
cid := container.ID
|
||||||
oldState = info.state
|
oldState := getContainerState(oldPod, cid)
|
||||||
|
newState := convertState(container.State)
|
||||||
|
e := generateEvent(pod.ID, cid.ID, oldState, newState)
|
||||||
|
updateEvents(eventsByPodID, e)
|
||||||
}
|
}
|
||||||
// Generate an event if required.
|
|
||||||
glog.V(7).Infof("GenericPLEG: %v/%v: %v -> %v", p.ID, cid, oldState, c.State)
|
if oldPod == nil {
|
||||||
if e := generateEvent(p.ID, cid, oldState, c.State); e != nil {
|
continue
|
||||||
events = append(events, e)
|
|
||||||
}
|
}
|
||||||
// Write to the new cache.
|
// Process all containers in the old pod, but no longer in the new pod.
|
||||||
containers[cid] = containerInfo{podID: p.ID, state: c.State}
|
for _, oldContainer := range oldPod.Containers {
|
||||||
|
cid := oldContainer.ID
|
||||||
|
oldState := convertState(oldContainer.State)
|
||||||
|
newState := getContainerState(pod, cid)
|
||||||
|
if newState != plegContainerNonExistent {
|
||||||
|
// We already processed the container.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Container no longer visible, generate an event.
|
||||||
|
e := generateEvent(pod.ID, cid.ID, oldState, plegContainerNonExistent)
|
||||||
|
updateEvents(eventsByPodID, e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swap the container info cache. This is purely to avoid the need of
|
// Process all pods that are no longer visible.
|
||||||
// garbage collection.
|
for pid := range g.podRecords {
|
||||||
g.containers = containers
|
if pod := g.podRecords.getCurrent(pid); pod != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
oldPod := g.podRecords.getOld(pid)
|
||||||
|
for _, oldContainer := range oldPod.Containers {
|
||||||
|
cid := oldContainer.ID
|
||||||
|
oldState := convertState(oldContainer.State)
|
||||||
|
e := generateEvent(oldPod.ID, cid.ID, oldState, plegContainerNonExistent)
|
||||||
|
updateEvents(eventsByPodID, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the internal storage.
|
||||||
|
g.podRecords.updateAll()
|
||||||
|
|
||||||
// Send out the events.
|
// Send out the events.
|
||||||
|
for _, events := range eventsByPodID {
|
||||||
for i := range events {
|
for i := range events {
|
||||||
g.eventChannel <- events[i]
|
g.eventChannel <- events[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) {
|
||||||
|
if e == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
eventsByPodID[e.ID] = append(eventsByPodID[e.ID], e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getContainerState(pod *kubecontainer.Pod, cid kubecontainer.ContainerID) plegContainerState {
|
||||||
|
// Default to the non-existent state.
|
||||||
|
state := plegContainerNonExistent
|
||||||
|
if pod == nil {
|
||||||
|
return state
|
||||||
|
}
|
||||||
|
container := pod.FindContainerByID(cid)
|
||||||
|
if container == nil {
|
||||||
|
return state
|
||||||
|
}
|
||||||
|
return convertState(container.State)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod {
|
||||||
|
r, ok := pr[id]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return r.old
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr podRecords) getCurrent(id types.UID) *kubecontainer.Pod {
|
||||||
|
r, ok := pr[id]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return r.current
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr podRecords) setCurrent(pod *kubecontainer.Pod) {
|
||||||
|
if r, ok := pr[pod.ID]; ok {
|
||||||
|
r.current = pod
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pr[pod.ID] = &podRecord{current: pod}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr podRecords) updateAll() {
|
||||||
|
for k, r := range pr {
|
||||||
|
if r.current == nil {
|
||||||
|
// Pod no longer exists; delete the entry.
|
||||||
|
delete(pr, k)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
r.old = r.current
|
||||||
|
r.current = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -43,7 +43,7 @@ func newTestGenericPLEG() *TestGenericPLEG {
|
|||||||
relistPeriod: time.Hour,
|
relistPeriod: time.Hour,
|
||||||
runtime: fakeRuntime,
|
runtime: fakeRuntime,
|
||||||
eventChannel: make(chan *PodLifecycleEvent, 100),
|
eventChannel: make(chan *PodLifecycleEvent, 100),
|
||||||
containers: make(map[string]containerInfo),
|
podRecords: make(podRecords),
|
||||||
}
|
}
|
||||||
return &TestGenericPLEG{pleg: pleg, runtime: fakeRuntime}
|
return &TestGenericPLEG{pleg: pleg, runtime: fakeRuntime}
|
||||||
}
|
}
|
||||||
@ -79,7 +79,7 @@ func verifyEvents(t *testing.T, expected, actual []*PodLifecycleEvent) {
|
|||||||
sort.Sort(sortableEvents(expected))
|
sort.Sort(sortableEvents(expected))
|
||||||
sort.Sort(sortableEvents(actual))
|
sort.Sort(sortableEvents(actual))
|
||||||
if !reflect.DeepEqual(expected, actual) {
|
if !reflect.DeepEqual(expected, actual) {
|
||||||
t.Errorf("Actual events differ from the expected; diff: %v", util.ObjectDiff(expected, actual))
|
t.Errorf("Actual events differ from the expected; diff:\n %v", util.ObjectDiff(expected, actual))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,7 +87,6 @@ func TestRelisting(t *testing.T) {
|
|||||||
testPleg := newTestGenericPLEG()
|
testPleg := newTestGenericPLEG()
|
||||||
pleg, runtime := testPleg.pleg, testPleg.runtime
|
pleg, runtime := testPleg.pleg, testPleg.runtime
|
||||||
ch := pleg.Watch()
|
ch := pleg.Watch()
|
||||||
|
|
||||||
// The first relist should send a PodSync event to each pod.
|
// The first relist should send a PodSync event to each pod.
|
||||||
runtime.AllPodList = []*kubecontainer.Pod{
|
runtime.AllPodList = []*kubecontainer.Pod{
|
||||||
{
|
{
|
||||||
@ -146,3 +145,67 @@ func TestRelisting(t *testing.T) {
|
|||||||
actual = getEventsFromChannel(ch)
|
actual = getEventsFromChannel(ch)
|
||||||
verifyEvents(t, expected, actual)
|
verifyEvents(t, expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReportMissingContainers(t *testing.T) {
|
||||||
|
testPleg := newTestGenericPLEG()
|
||||||
|
pleg, runtime := testPleg.pleg, testPleg.runtime
|
||||||
|
ch := pleg.Watch()
|
||||||
|
runtime.AllPodList = []*kubecontainer.Pod{
|
||||||
|
{
|
||||||
|
ID: "1234",
|
||||||
|
Containers: []*kubecontainer.Container{
|
||||||
|
createTestContainer("c1", kubecontainer.ContainerStateRunning),
|
||||||
|
createTestContainer("c2", kubecontainer.ContainerStateRunning),
|
||||||
|
createTestContainer("c3", kubecontainer.ContainerStateExited),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Drain the events from the channel
|
||||||
|
pleg.relist()
|
||||||
|
getEventsFromChannel(ch)
|
||||||
|
|
||||||
|
// Container c2 was stopped and removed between relists. We should report
|
||||||
|
// the event. The exited container c3 was garbage collected (i.e., removed)
|
||||||
|
// between relists. We should ignore that event.
|
||||||
|
runtime.AllPodList = []*kubecontainer.Pod{
|
||||||
|
{
|
||||||
|
ID: "1234",
|
||||||
|
Containers: []*kubecontainer.Container{
|
||||||
|
createTestContainer("c1", kubecontainer.ContainerStateRunning),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pleg.relist()
|
||||||
|
expected := []*PodLifecycleEvent{
|
||||||
|
{ID: "1234", Type: ContainerDied, Data: "c2"},
|
||||||
|
}
|
||||||
|
actual := getEventsFromChannel(ch)
|
||||||
|
verifyEvents(t, expected, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReportMissingPods(t *testing.T) {
|
||||||
|
testPleg := newTestGenericPLEG()
|
||||||
|
pleg, runtime := testPleg.pleg, testPleg.runtime
|
||||||
|
ch := pleg.Watch()
|
||||||
|
runtime.AllPodList = []*kubecontainer.Pod{
|
||||||
|
{
|
||||||
|
ID: "1234",
|
||||||
|
Containers: []*kubecontainer.Container{
|
||||||
|
createTestContainer("c2", kubecontainer.ContainerStateRunning),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// Drain the events from the channel
|
||||||
|
pleg.relist()
|
||||||
|
getEventsFromChannel(ch)
|
||||||
|
|
||||||
|
// Container c2 was stopped and removed between relists. We should report
|
||||||
|
// the event.
|
||||||
|
runtime.AllPodList = []*kubecontainer.Pod{}
|
||||||
|
pleg.relist()
|
||||||
|
expected := []*PodLifecycleEvent{
|
||||||
|
{ID: "1234", Type: ContainerDied, Data: "c2"},
|
||||||
|
}
|
||||||
|
actual := getEventsFromChannel(ch)
|
||||||
|
verifyEvents(t, expected, actual)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user