TRA-3842 daemon mode (#427)

* Update config.go, tapConfig.go, and models.go

* WIP

* Update go.sum

* Update tapRunner.go

* Update tap.go

* WIP

* WIP

* Update Dockerfile, main.go, and 2 more files...

* WIP

* Update utils.go, tapClusterResourceManagement.go, and utils.go

* Merge branch 'develop'

* Update metadata_controller.go, utils.go, and 2 more files...

* Update main.go, utils.go, and tapRunner.go

* Update tapRunner.go

* Update config.go, config.go, and models.go

* Update main.go, main.go, and stats_provider_test.go

* Update provider.go

* bug fixes

* Update main.go, metadata_controller.go, and 13 more files...

* Update metadata_controller.go, status_controller.go, and 4 more files...

* Update main.go, config.go, and 3 more files...

* Update tapRunner.go

* Update config.go, stats_provider_test.go, and consts.go
This commit is contained in:
RamiBerm
2021-11-04 11:46:45 +02:00
committed by GitHub
parent 8af2e562f8
commit a3ec5d147e
31 changed files with 759 additions and 189 deletions

View File

@@ -1,14 +1,20 @@
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"github.com/up9inc/mizu/shared/kubernetes"
"io/ioutil"
v1 "k8s.io/api/core/v1"
"mizuserver/pkg/api"
"mizuserver/pkg/config"
"mizuserver/pkg/controllers"
"mizuserver/pkg/database"
"mizuserver/pkg/models"
"mizuserver/pkg/providers"
"mizuserver/pkg/routes"
"mizuserver/pkg/up9"
"mizuserver/pkg/utils"
@@ -19,6 +25,7 @@ import (
"path/filepath"
"plugin"
"sort"
"syscall"
"time"
"github.com/gin-contrib/static"
@@ -45,6 +52,7 @@ var extensionsMap map[string]*tapApi.Extension // global
const (
socketConnectionRetries = 10
socketConnectionRetryDelay = time.Second * 2
socketHandshakeTimeout = time.Second * 2
)
func main() {
@@ -101,6 +109,7 @@ func main() {
go pipeTapChannelToSocket(socketConnection, filteredOutputItemsChannel)
} else if *apiServerMode {
database.InitDataBase(config.Config.AgentDatabasePath)
api.StartResolving(*namespace)
outputItemsChannel := make(chan *tapApi.OutputChannelItem)
@@ -197,6 +206,15 @@ func hostApi(socketHarOutputChannel chan<- *tapApi.OutputChannelItem) {
routes.StatusRoutes(app)
routes.NotFoundRoute(app)
if config.Config.DaemonMode {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if _, err := startMizuTapperSyncer(ctx); err != nil {
logger.Log.Fatalf("error initializing tapper syncer: %+v", err)
}
}
utils.StartServer(app)
}
@@ -296,6 +314,15 @@ func pipeTapChannelToSocket(connection *websocket.Conn, messageDataChannel <-cha
err = connection.WriteMessage(websocket.TextMessage, marshaledData)
if err != nil {
logger.Log.Errorf("error sending message through socket server %v, err: %s, (%v,%+v)", messageData, err, err, err)
if errors.Is(err, syscall.EPIPE) {
logger.Log.Warning("detected socket disconnection, reestablishing socket connection")
connection, err = dialSocketWithRetry(*apiServerAddress, socketConnectionRetries, socketConnectionRetryDelay)
if err != nil {
logger.Log.Fatalf("error reestablishing socket connection: %v", err)
} else {
logger.Log.Info("recovered connection successfully")
}
}
continue
}
}
@@ -326,17 +353,77 @@ func determineLogLevel() (logLevel logging.Level) {
func dialSocketWithRetry(socketAddress string, retryAmount int, retryDelay time.Duration) (*websocket.Conn, error) {
var lastErr error
dialer := &websocket.Dialer{ // we use our own dialer instead of the default due to the default's 45 sec handshake timeout, we occasionally encounter hanging socket handshakes when tapper tries to connect to api too soon
Proxy: http.ProxyFromEnvironment,
HandshakeTimeout: socketHandshakeTimeout,
}
for i := 1; i < retryAmount; i++ {
socketConnection, _, err := websocket.DefaultDialer.Dial(socketAddress, nil)
socketConnection, _, err := dialer.Dial(socketAddress, nil)
if err != nil {
if i < retryAmount {
logger.Log.Debugf("socket connection to %s failed: %v, retrying %d out of %d in %d seconds...", socketAddress, err, i, retryAmount, retryDelay / time.Second)
logger.Log.Infof("socket connection to %s failed: %v, retrying %d out of %d in %d seconds...", socketAddress, err, i, retryAmount, retryDelay / time.Second)
time.Sleep(retryDelay)
}
} else {
logger.Log.Debugf("socket connection to %s successful", socketAddress)
return socketConnection, nil
}
}
return nil, lastErr
}
}
func startMizuTapperSyncer(ctx context.Context) (*kubernetes.MizuTapperSyncer, error){
provider, err := kubernetes.NewProviderInCluster()
if err != nil {
return nil, err
}
tapperSyncer, err := kubernetes.CreateAndStartMizuTapperSyncer(ctx, provider, kubernetes.TapperSyncerConfig{
TargetNamespaces: config.Config.TargetNamespaces,
PodFilterRegex: config.Config.TapTargetRegex.Regexp,
MizuResourcesNamespace: config.Config.MizuResourcesNamespace,
AgentImage: config.Config.AgentImage,
TapperResources: config.Config.TapperResources,
ImagePullPolicy: v1.PullPolicy(config.Config.PullPolicy),
DumpLogs: config.Config.DumpLogs,
IgnoredUserAgents: config.Config.IgnoredUserAgents,
MizuApiFilteringOptions: config.Config.MizuApiFilteringOptions,
MizuServiceAccountExists: true, //assume service account exists since daemon mode will not function without it anyway
})
if err != nil {
return nil, err
}
// handle tapperSyncer events (pod changes and errors)
go func() {
for {
select {
case syncerErr, ok := <-tapperSyncer.ErrorOut:
if !ok {
logger.Log.Debug("mizuTapperSyncer err channel closed, ending listener loop")
return
}
logger.Log.Fatalf("fatal tap syncer error: %v", syncerErr)
case _, ok := <-tapperSyncer.TapPodChangesOut:
if !ok {
logger.Log.Debug("mizuTapperSyncer pod changes channel closed, ending listener loop")
return
}
tapStatus := shared.TapStatus{Pods: kubernetes.GetPodInfosForPods(tapperSyncer.CurrentlyTappedPods)}
serializedTapStatus, err := json.Marshal(shared.CreateWebSocketStatusMessage(tapStatus))
if err != nil {
logger.Log.Fatalf("error serializing tap status: %v", err)
}
api.BroadcastToBrowserClients(serializedTapStatus)
providers.TapStatus.Pods = tapStatus.Pods
case <-ctx.Done():
logger.Log.Debug("mizuTapperSyncer event listener loop exiting due to context done")
return
}
}
}()
return tapperSyncer, nil
}