Merge pull request #10175 from ChengyuZhu6/error_message

runtime: Add specific error message for gRPC request timeouts
This commit is contained in:
Fabiano Fidêncio 2024-08-16 22:06:49 +02:00 committed by GitHub
commit 28aa4314ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 116 additions and 5 deletions

View File

@ -42,6 +42,7 @@ import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
grpcStatus "google.golang.org/grpc/status"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/proto"
@ -575,6 +576,9 @@ func (k *kataAgent) exec(ctx context.Context, sandbox *Sandbox, c Container, cmd
}
if _, err := k.sendReq(ctx, req); err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "ExecProcessRequest timed out")
}
return nil, err
}
@ -592,6 +596,9 @@ func (k *kataAgent) updateInterface(ctx context.Context, ifc *pbTypes.Interface)
"interface-requested": fmt.Sprintf("%+v", ifc),
"resulting-interface": fmt.Sprintf("%+v", resultingInterface),
}).WithError(err).Error("update interface request failed")
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "UpdateInterfaceRequest timed out")
}
}
if resultInterface, ok := resultingInterface.(*pbTypes.Interface); ok {
return resultInterface, err
@ -621,6 +628,9 @@ func (k *kataAgent) updateRoutes(ctx context.Context, routes []*pbTypes.Route) (
"routes-requested": fmt.Sprintf("%+v", routes),
"resulting-routes": fmt.Sprintf("%+v", resultingRoutes),
}).WithError(err).Error("update routes request failed")
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "UpdateRoutesRequest timed out")
}
}
resultRoutes, ok := resultingRoutes.(*grpc.Routes)
if ok && resultRoutes != nil {
@ -639,6 +649,9 @@ func (k *kataAgent) updateEphemeralMounts(ctx context.Context, storages []*grpc.
if _, err := k.sendReq(ctx, storagesReq); err != nil {
k.Logger().WithError(err).Error("update mounts request failed")
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "UpdateEphemeralMountsRequest timed out")
}
return err
}
return nil
@ -661,6 +674,9 @@ func (k *kataAgent) addARPNeighbors(ctx context.Context, neighs []*pbTypes.ARPNe
}).Warn("add ARP neighbors request failed due to old agent, please upgrade Kata Containers image version")
return nil
}
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "AddARPNeighborsRequest timed out")
}
k.Logger().WithFields(logrus.Fields{
"arpneighbors-requested": fmt.Sprintf("%+v", neighs),
}).WithError(err).Error("add ARP neighbors request failed")
@ -674,6 +690,9 @@ func (k *kataAgent) listInterfaces(ctx context.Context) ([]*pbTypes.Interface, e
req := &grpc.ListInterfacesRequest{}
resultingInterfaces, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "ListInterfacesRequest timed out")
}
return nil, err
}
resultInterfaces, ok := resultingInterfaces.(*grpc.Interfaces)
@ -687,6 +706,9 @@ func (k *kataAgent) listRoutes(ctx context.Context) ([]*pbTypes.Route, error) {
req := &grpc.ListRoutesRequest{}
resultingRoutes, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "ListRoutesRequest timed out")
}
return nil, err
}
resultRoutes, ok := resultingRoutes.(*grpc.Routes)
@ -813,6 +835,9 @@ func (k *kataAgent) startSandbox(ctx context.Context, sandbox *Sandbox) error {
_, err = k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "CreateSandboxRequest timed out")
}
return err
}
@ -917,6 +942,9 @@ func (k *kataAgent) stopSandbox(ctx context.Context, sandbox *Sandbox) error {
req := &grpc.DestroySandboxRequest{}
if _, err := k.sendReq(ctx, req); err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "DestroySandboxRequest timed out")
}
return err
}
@ -1391,6 +1419,9 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
}
if _, err = k.sendReq(ctx, req); err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "CreateContainerRequest timed out")
}
return nil, err
}
return buildProcessFromExecID(req.ExecId)
@ -1850,6 +1881,9 @@ func (k *kataAgent) startContainer(ctx context.Context, sandbox *Sandbox, c *Con
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "StartContainerRequest timed out")
}
return err
}
@ -1858,6 +1892,9 @@ func (k *kataAgent) stopContainer(ctx context.Context, sandbox *Sandbox, c Conta
defer span.End()
_, err := k.sendReq(ctx, &grpc.RemoveContainerRequest{ContainerId: c.id})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "RemoveContainerRequest timed out")
}
return err
}
@ -1874,6 +1911,9 @@ func (k *kataAgent) signalProcess(ctx context.Context, c *Container, processID s
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "SignalProcessRequest timed out")
}
return err
}
@ -1886,6 +1926,9 @@ func (k *kataAgent) winsizeProcess(ctx context.Context, c *Container, processID
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "TtyWinResizeRequest timed out")
}
return err
}
@ -1901,6 +1944,9 @@ func (k *kataAgent) updateContainer(ctx context.Context, sandbox *Sandbox, c Con
}
_, err = k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "UpdateContainerRequest timed out")
}
return err
}
@ -1910,6 +1956,9 @@ func (k *kataAgent) pauseContainer(ctx context.Context, sandbox *Sandbox, c Cont
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "PauseContainerRequest timed out")
}
return err
}
@ -1919,6 +1968,9 @@ func (k *kataAgent) resumeContainer(ctx context.Context, sandbox *Sandbox, c Con
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "ResumeContainerRequest timed out")
}
return err
}
@ -1943,6 +1995,9 @@ func (k *kataAgent) memHotplugByProbe(ctx context.Context, addr uint64, sizeMB u
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "MemHotplugByProbeRequest timed out")
}
return err
}
@ -1954,6 +2009,9 @@ func (k *kataAgent) onlineCPUMem(ctx context.Context, cpus uint32, cpuOnly bool)
}
_, err := k.sendReq(ctx, req)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "OnlineCPUMemRequest timed out")
}
return err
}
@ -1965,6 +2023,9 @@ func (k *kataAgent) statsContainer(ctx context.Context, sandbox *Sandbox, c Cont
returnStats, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "StatsContainerRequest timed out")
}
return nil, err
}
@ -2046,6 +2107,9 @@ func (k *kataAgent) disconnect(ctx context.Context) error {
func (k *kataAgent) check(ctx context.Context) error {
_, err := k.sendReq(ctx, &grpc.CheckRequest{})
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "CheckRequest timed out")
}
err = fmt.Errorf("Failed to Check if grpc server is working: %s", err)
}
return err
@ -2060,6 +2124,9 @@ func (k *kataAgent) waitProcess(ctx context.Context, c *Container, processID str
ExecId: processID,
})
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return 0, status.Errorf(codes.DeadlineExceeded, "WaitProcessRequest timed out")
}
return 0, err
}
@ -2074,6 +2141,9 @@ func (k *kataAgent) writeProcessStdin(ctx context.Context, c *Container, Process
})
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return 0, status.Errorf(codes.DeadlineExceeded, "WriteStreamRequest timed out")
}
return 0, err
}
@ -2085,7 +2155,9 @@ func (k *kataAgent) closeProcessStdin(ctx context.Context, c *Container, Process
ContainerId: c.id,
ExecId: ProcessID,
})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "CloseStdinRequest timed out")
}
return err
}
@ -2093,12 +2165,17 @@ func (k *kataAgent) reseedRNG(ctx context.Context, data []byte) error {
_, err := k.sendReq(ctx, &grpc.ReseedRandomDevRequest{
Data: data,
})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "ReseedRandomDevRequest timed out")
}
return err
}
func (k *kataAgent) removeStaleVirtiofsShareMounts(ctx context.Context) error {
_, err := k.sendReq(ctx, &grpc.RemoveStaleVirtiofsShareMountsRequest{})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "removeStaleVirtiofsShareMounts timed out")
}
return err
}
@ -2320,13 +2397,15 @@ func (k *kataAgent) readProcessStream(containerID, processID string, data []byte
copy(data, resp.Data)
return len(resp.Data), nil
}
return 0, err
}
func (k *kataAgent) getGuestDetails(ctx context.Context, req *grpc.GuestDetailsRequest) (*grpc.GuestDetailsResponse, error) {
resp, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "GuestDetailsRequest request timed out")
}
return nil, err
}
@ -2338,7 +2417,9 @@ func (k *kataAgent) setGuestDateTime(ctx context.Context, tv time.Time) error {
Sec: tv.Unix(),
Usec: int64(tv.Nanosecond() / 1e3),
})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "SetGuestDateTimeRequest request timed out")
}
return err
}
@ -2391,6 +2472,9 @@ func (k *kataAgent) copyFile(ctx context.Context, src, dst string) error {
// Handle the special case where the file is empty
if cpReq.FileSize == 0 {
_, err := k.sendReq(ctx, cpReq)
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "CopyFileRequest timed out")
}
return err
}
@ -2407,6 +2491,9 @@ func (k *kataAgent) copyFile(ctx context.Context, src, dst string) error {
cpReq.Offset = offset
if _, err = k.sendReq(ctx, cpReq); err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "CopyFileRequest timed out")
}
return fmt.Errorf("Could not send CopyFile request: %v", err)
}
@ -2423,6 +2510,9 @@ func (k *kataAgent) addSwap(ctx context.Context, PCIPath types.PciPath) error {
defer span.End()
_, err := k.sendReq(ctx, &grpc.AddSwapRequest{PCIPath: PCIPath.ToArray()})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "AddSwapRequest timed out")
}
return err
}
@ -2449,6 +2539,9 @@ func (k *kataAgent) getOOMEvent(ctx context.Context) (string, error) {
req := &grpc.GetOOMEventRequest{}
result, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return "", status.Errorf(codes.DeadlineExceeded, "GetOOMEventRequest timed out")
}
return "", err
}
if oomEvent, ok := result.(*grpc.OOMEvent); ok {
@ -2460,6 +2553,9 @@ func (k *kataAgent) getOOMEvent(ctx context.Context) (string, error) {
func (k *kataAgent) getAgentMetrics(ctx context.Context, req *grpc.GetMetricsRequest) (*grpc.Metrics, error) {
resp, err := k.sendReq(ctx, req)
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "GetMetricsRequest timed out")
}
return nil, err
}
@ -2469,6 +2565,9 @@ func (k *kataAgent) getAgentMetrics(ctx context.Context, req *grpc.GetMetricsReq
func (k *kataAgent) getIPTables(ctx context.Context, isIPv6 bool) ([]byte, error) {
resp, err := k.sendReq(ctx, &grpc.GetIPTablesRequest{IsIpv6: isIPv6})
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "GetIPTablesRequest timed out")
}
return nil, err
}
return resp.(*grpc.GetIPTablesResponse).Data, nil
@ -2481,6 +2580,9 @@ func (k *kataAgent) setIPTables(ctx context.Context, isIPv6 bool, data []byte) e
})
if err != nil {
k.Logger().WithError(err).Errorf("setIPTables request to agent failed")
if err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "SetIPTablesRequest timed out")
}
}
return err
@ -2489,6 +2591,9 @@ func (k *kataAgent) setIPTables(ctx context.Context, isIPv6 bool, data []byte) e
func (k *kataAgent) getGuestVolumeStats(ctx context.Context, volumeGuestPath string) ([]byte, error) {
result, err := k.sendReq(ctx, &grpc.VolumeStatsRequest{VolumeGuestPath: volumeGuestPath})
if err != nil {
if err.Error() == context.DeadlineExceeded.Error() {
return nil, status.Errorf(codes.DeadlineExceeded, "VolumeStatsRequest timed out")
}
return nil, err
}
@ -2502,10 +2607,16 @@ func (k *kataAgent) getGuestVolumeStats(ctx context.Context, volumeGuestPath str
func (k *kataAgent) resizeGuestVolume(ctx context.Context, volumeGuestPath string, size uint64) error {
_, err := k.sendReq(ctx, &grpc.ResizeVolumeRequest{VolumeGuestPath: volumeGuestPath, Size: size})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "ResizeVolumeRequest timed out")
}
return err
}
func (k *kataAgent) setPolicy(ctx context.Context, policy string) error {
_, err := k.sendReq(ctx, &grpc.SetPolicyRequest{Policy: policy})
if err != nil && err.Error() == context.DeadlineExceeded.Error() {
return status.Errorf(codes.DeadlineExceeded, "SetPolicyRequest timed out")
}
return err
}

View File

@ -171,7 +171,7 @@ setup() {
# The pod should be failed because the default timeout of CreateContainerRequest is 60s
assert_pod_fail "$pod_config"
assert_logs_contain "$node" kata "$node_start_time" \
'context deadline exceeded'
'CreateContainerRequest timed out'
}
@test "Test we can pull a large image inside the guest with large createcontainer timeout" {