From 9a2edbac56070f9c5798f82a09f4ad1ecfb21ffb Mon Sep 17 00:00:00 2001 From: "M. Mert Yildiran" Date: Tue, 17 Aug 2021 14:53:32 +0300 Subject: [PATCH] Bring back the HTTP request-response pair matcher --- tap/api/api.go | 17 +- tap/extensions/amqp/main.go | 3 +- tap/extensions/http/go.mod | 4 +- tap/extensions/http/go.sum | 4 + tap/extensions/http/har_writer.go | 316 ++++++++++++++++++++++++++++++ tap/extensions/http/main.go | 38 +++- tap/extensions/http/matcher.go | 118 +++++++++++ tap/extensions/kafka/main.go | 3 +- tap/tcp_stream.go | 7 - tap/tcp_stream_factory.go | 19 +- 10 files changed, 504 insertions(+), 25 deletions(-) create mode 100644 tap/extensions/http/go.sum create mode 100644 tap/extensions/http/har_writer.go create mode 100644 tap/extensions/http/matcher.go diff --git a/tap/api/api.go b/tap/api/api.go index 885904d2d..31289737b 100644 --- a/tap/api/api.go +++ b/tap/api/api.go @@ -14,8 +14,23 @@ type Extension struct { Dissector Dissector } +type ConnectionInfo struct { + ClientIP string + ClientPort string + ServerIP string + ServerPort string + IsOutgoing bool +} + +type TcpID struct { + SrcIP string + DstIP string + SrcPort string + DstPort string +} + type Dissector interface { Register(*Extension) Ping() - Dissect(b *bufio.Reader, isClient bool) interface{} + Dissect(b *bufio.Reader, isClient bool, tcpID *TcpID) interface{} } diff --git a/tap/extensions/amqp/main.go b/tap/extensions/amqp/main.go index 0c6f233b6..15012e5e6 100644 --- a/tap/extensions/amqp/main.go +++ b/tap/extensions/amqp/main.go @@ -23,10 +23,9 @@ func (g dissecting) Ping() { log.Printf("pong AMQP\n") } -func (g dissecting) Dissect(b *bufio.Reader, isClient bool) interface{} { +func (g dissecting) Dissect(b *bufio.Reader, isClient bool, tcpID *api.TcpID) interface{} { // TODO: Implement return nil } -// exported as symbol named "Greeter" var Dissector dissecting diff --git a/tap/extensions/http/go.mod b/tap/extensions/http/go.mod index 2abb3d0d8..6274daec5 100644 --- a/tap/extensions/http/go.mod +++ b/tap/extensions/http/go.mod @@ -3,7 +3,9 @@ module github.com/up9inc/mizu/tap/extensions/http go 1.16 require ( - github.com/up9inc/mizu/tap/api v0.0.0 + github.com/google/martian v2.1.0+incompatible // indirect + github.com/romana/rlog v0.0.0-20171115192701-f018bc92e7d7 // indirect + github.com/up9inc/mizu/tap/api v0.0.0 ) replace github.com/up9inc/mizu/tap/api v0.0.0 => ../../api diff --git a/tap/extensions/http/go.sum b/tap/extensions/http/go.sum new file mode 100644 index 000000000..f25f09c37 --- /dev/null +++ b/tap/extensions/http/go.sum @@ -0,0 +1,4 @@ +github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/romana/rlog v0.0.0-20171115192701-f018bc92e7d7 h1:jkvpcEatpwuMF5O5LVxTnehj6YZ/aEZN4NWD/Xml4pI= +github.com/romana/rlog v0.0.0-20171115192701-f018bc92e7d7/go.mod h1:KTrHyWpO1sevuXPZwyeZc72ddWRFqNSKDFl7uVWKpg0= diff --git a/tap/extensions/http/har_writer.go b/tap/extensions/http/har_writer.go new file mode 100644 index 000000000..86e52f542 --- /dev/null +++ b/tap/extensions/http/har_writer.go @@ -0,0 +1,316 @@ +package main + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" + + "github.com/google/martian/har" + "github.com/romana/rlog" +) + +var outputLevel int +var errorsMap map[string]uint +var errorsMapMutex sync.Mutex +var nErrors uint +var ownIps []string // global +var hostMode bool // global + +func logError(minOutputLevel int, t string, s string, a ...interface{}) { + errorsMapMutex.Lock() + nErrors++ + nb, _ := errorsMap[t] + errorsMap[t] = nb + 1 + errorsMapMutex.Unlock() + + if outputLevel >= minOutputLevel { + formatStr := fmt.Sprintf("%s: %s", t, s) + rlog.Errorf(formatStr, a...) + } +} +func Error(t string, s string, a ...interface{}) { + logError(0, t, s, a...) +} +func SilentError(t string, s string, a ...interface{}) { + logError(2, t, s, a...) +} +func Debug(s string, a ...interface{}) { + rlog.Debugf(s, a...) +} +func Trace(s string, a ...interface{}) { + rlog.Tracef(1, s, a...) +} + +const readPermission = 0644 +const harFilenameSuffix = ".har" +const tempFilenameSuffix = ".har.tmp" + +type PairChanItem struct { + Request *http.Request + RequestTime time.Time + Response *http.Response + ResponseTime time.Time + RequestSenderIp string + ConnectionInfo *ConnectionInfo +} + +func openNewHarFile(filename string) *HarFile { + file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, readPermission) + if err != nil { + log.Panicf("Failed to open output file: %s (%v,%+v)", err, err, err) + } + + harFile := HarFile{file: file, entryCount: 0} + harFile.writeHeader() + + return &harFile +} + +type HarFile struct { + file *os.File + entryCount int +} + +func NewEntry(request *http.Request, requestTime time.Time, response *http.Response, responseTime time.Time) (*har.Entry, error) { + harRequest, err := har.NewRequest(request, false) + if err != nil { + SilentError("convert-request-to-har", "Failed converting request to HAR %s (%v,%+v)", err, err, err) + return nil, errors.New("Failed converting request to HAR") + } + + // For requests with multipart/form-data or application/x-www-form-urlencoded Content-Type, + // martian/har will parse the request body and place the parameters in harRequest.PostData.Params + // instead of harRequest.PostData.Text (as the HAR spec requires it). + // Mizu currently only looks at PostData.Text. Therefore, instead of letting martian/har set the content of + // PostData, always copy the request body to PostData.Text. + if request.ContentLength > 0 { + reqBody, err := ioutil.ReadAll(request.Body) + if err != nil { + SilentError("read-request-body", "Failed converting request to HAR %s (%v,%+v)", err, err, err) + return nil, errors.New("Failed reading request body") + } + request.Body = ioutil.NopCloser(bytes.NewReader(reqBody)) + harRequest.PostData.Text = string(reqBody) + } + + harResponse, err := har.NewResponse(response, true) + if err != nil { + SilentError("convert-response-to-har", "Failed converting response to HAR %s (%v,%+v)", err, err, err) + return nil, errors.New("Failed converting response to HAR") + } + + if harRequest.PostData != nil && strings.HasPrefix(harRequest.PostData.MimeType, "application/grpc") { + // Force HTTP/2 gRPC into HAR template + + harRequest.URL = fmt.Sprintf("%s://%s%s", request.Header.Get(":scheme"), request.Header.Get(":authority"), request.Header.Get(":path")) + + status, err := strconv.Atoi(response.Header.Get(":status")) + if err != nil { + SilentError("convert-response-status-for-har", "Failed converting status to int %s (%v,%+v)", err, err, err) + return nil, errors.New("Failed converting response status to int for HAR") + } + harResponse.Status = status + } else { + // Martian copies http.Request.URL.String() to har.Request.URL, which usually contains the path. + // However, according to the HAR spec, the URL field needs to be the absolute URL. + var scheme string + if request.URL.Scheme != "" { + scheme = request.URL.Scheme + } else { + scheme = "http" + } + harRequest.URL = fmt.Sprintf("%s://%s%s", scheme, request.Host, request.URL) + } + + totalTime := responseTime.Sub(requestTime).Round(time.Millisecond).Milliseconds() + if totalTime < 1 { + totalTime = 1 + } + + harEntry := har.Entry{ + StartedDateTime: time.Now().UTC(), + Time: totalTime, + Request: harRequest, + Response: harResponse, + Cache: &har.Cache{}, + Timings: &har.Timings{ + Send: -1, + Wait: -1, + Receive: totalTime, + }, + } + + return &harEntry, nil +} + +func (f *HarFile) WriteEntry(harEntry *har.Entry) { + harEntryJson, err := json.Marshal(harEntry) + if err != nil { + SilentError("har-entry-marshal", "Failed converting har entry object to JSON%s (%v,%+v)", err, err, err) + return + } + + var separator string + if f.GetEntryCount() > 0 { + separator = "," + } else { + separator = "" + } + + harEntryString := append([]byte(separator), harEntryJson...) + + if _, err := f.file.Write(harEntryString); err != nil { + log.Panicf("Failed to write to output file: %s (%v,%+v)", err, err, err) + } + + f.entryCount++ +} + +func (f *HarFile) GetEntryCount() int { + return f.entryCount +} + +func (f *HarFile) Close() { + f.writeTrailer() + + err := f.file.Close() + if err != nil { + log.Panicf("Failed to close output file: %s (%v,%+v)", err, err, err) + } +} + +func (f *HarFile) writeHeader() { + header := []byte(`{"log": {"version": "1.2", "creator": {"name": "Mizu", "version": "0.0.1"}, "entries": [`) + if _, err := f.file.Write(header); err != nil { + log.Panicf("Failed to write header to output file: %s (%v,%+v)", err, err, err) + } +} + +func (f *HarFile) writeTrailer() { + trailer := []byte("]}}") + if _, err := f.file.Write(trailer); err != nil { + log.Panicf("Failed to write trailer to output file: %s (%v,%+v)", err, err, err) + } +} + +func NewHarWriter(outputDir string, maxEntries int) *HarWriter { + return &HarWriter{ + OutputDirPath: outputDir, + MaxEntries: maxEntries, + PairChan: make(chan *PairChanItem), + OutChan: make(chan *OutputChannelItem, 1000), + currentFile: nil, + done: make(chan bool), + } +} + +type ConnectionInfo struct { + ClientIP string + ClientPort string + ServerIP string + ServerPort string + IsOutgoing bool +} + +type OutputChannelItem struct { + HarEntry *har.Entry + ConnectionInfo *ConnectionInfo + ValidationRulesChecker string +} + +type HarWriter struct { + OutputDirPath string + MaxEntries int + PairChan chan *PairChanItem + OutChan chan *OutputChannelItem + currentFile *HarFile + done chan bool +} + +func (hw *HarWriter) WritePair(request *http.Request, requestTime time.Time, response *http.Response, responseTime time.Time, connectionInfo *ConnectionInfo) { + hw.PairChan <- &PairChanItem{ + Request: request, + RequestTime: requestTime, + Response: response, + ResponseTime: responseTime, + ConnectionInfo: connectionInfo, + } +} + +func (hw *HarWriter) Start() { + if hw.OutputDirPath != "" { + if err := os.MkdirAll(hw.OutputDirPath, os.ModePerm); err != nil { + log.Panicf("Failed to create output directory: %s (%v,%+v)", err, err, err) + } + } + + go func() { + for pair := range hw.PairChan { + harEntry, err := NewEntry(pair.Request, pair.RequestTime, pair.Response, pair.ResponseTime) + if err != nil { + continue + } + + if hw.OutputDirPath != "" { + if hw.currentFile == nil { + hw.openNewFile() + } + + hw.currentFile.WriteEntry(harEntry) + + if hw.currentFile.GetEntryCount() >= hw.MaxEntries { + hw.closeFile() + } + } else { + hw.OutChan <- &OutputChannelItem{ + HarEntry: harEntry, + ConnectionInfo: pair.ConnectionInfo, + } + } + } + + if hw.currentFile != nil { + hw.closeFile() + } + hw.done <- true + }() +} + +func (hw *HarWriter) Stop() { + close(hw.PairChan) + <-hw.done + close(hw.OutChan) +} + +func (hw *HarWriter) openNewFile() { + filename := buildFilename(hw.OutputDirPath, time.Now(), tempFilenameSuffix) + hw.currentFile = openNewHarFile(filename) +} + +func (hw *HarWriter) closeFile() { + hw.currentFile.Close() + tmpFilename := hw.currentFile.file.Name() + hw.currentFile = nil + + filename := buildFilename(hw.OutputDirPath, time.Now(), harFilenameSuffix) + err := os.Rename(tmpFilename, filename) + if err != nil { + SilentError("Rename-file", "cannot rename file: %s (%v,%+v)", err, err, err) + } +} + +func buildFilename(dir string, t time.Time, suffix string) string { + // (epoch time in nanoseconds)__(YYYY_Month_DD__hh-mm-ss).har + filename := fmt.Sprintf("%d__%s%s", t.UnixNano(), t.Format("2006_Jan_02__15-04-05"), suffix) + return filepath.Join(dir, filename) +} diff --git a/tap/extensions/http/main.go b/tap/extensions/http/main.go index bd819eaf0..b83cb55cd 100644 --- a/tap/extensions/http/main.go +++ b/tap/extensions/http/main.go @@ -2,20 +2,25 @@ package main import ( "bufio" + "fmt" "io" "io/ioutil" "log" "net/http" + "time" "github.com/up9inc/mizu/tap/api" ) +var requestCounter uint +var responseCounter uint + func init() { log.Println("Initializing HTTP extension.") + requestCounter = 0 + responseCounter = 0 } -var discardBuffer = make([]byte, 4096) - type dissecting string func (g dissecting) Register(extension *api.Extension) { @@ -28,12 +33,12 @@ func (g dissecting) Ping() { log.Printf("pong HTTP\n") } -func (g dissecting) Dissect(b *bufio.Reader, isClient bool) interface{} { +func (g dissecting) Dissect(b *bufio.Reader, isClient bool, tcpID *api.TcpID) interface{} { for { if isClient { + requestCounter++ req, err := http.ReadRequest(b) if err == io.EOF || err == io.ErrUnexpectedEOF { - // We must read until we see an EOF... very important! return nil } else if err != nil { log.Println("Error reading stream:", err) @@ -42,10 +47,20 @@ func (g dissecting) Dissect(b *bufio.Reader, isClient bool) interface{} { req.Body.Close() log.Printf("Received request: %+v with body: %+v\n", req, body) } + + ident := fmt.Sprintf( + "%s->%s %s->%s %d", + tcpID.SrcIP, + tcpID.DstIP, + tcpID.SrcPort, + tcpID.DstPort, + requestCounter, + ) + reqResMatcher.registerRequest(ident, req, time.Now()) } else { + responseCounter++ res, err := http.ReadResponse(b, nil) if err == io.EOF || err == io.ErrUnexpectedEOF { - // We must read until we see an EOF... very important! return nil } else if err != nil { log.Println("Error reading stream:", err) @@ -54,9 +69,20 @@ func (g dissecting) Dissect(b *bufio.Reader, isClient bool) interface{} { res.Body.Close() log.Printf("Received response: %+v with body: %+v\n", res, body) } + ident := fmt.Sprintf( + "%s->%s %s->%s %d", + tcpID.DstIP, + tcpID.SrcIP, + tcpID.DstPort, + tcpID.SrcPort, + responseCounter, + ) + reqResPair := reqResMatcher.registerResponse(ident, res, time.Now()) + if reqResPair != nil { + log.Printf("YES REQRES MATCHED!\n") + } } } } -// exported as symbol named "Greeter" var Dissector dissecting diff --git a/tap/extensions/http/matcher.go b/tap/extensions/http/matcher.go new file mode 100644 index 000000000..fa4db76ec --- /dev/null +++ b/tap/extensions/http/matcher.go @@ -0,0 +1,118 @@ +package main + +import ( + "fmt" + "net/http" + "strings" + "sync" + "time" +) + +var reqResMatcher = createResponseRequestMatcher() // global + +type requestResponsePair struct { + Request httpMessage `json:"request"` + Response httpMessage `json:"response"` +} + +type httpMessage struct { + isRequest bool + captureTime time.Time + orig interface{} +} + +// Key is {client_addr}:{client_port}->{dest_addr}:{dest_port} +type requestResponseMatcher struct { + openMessagesMap sync.Map +} + +func createResponseRequestMatcher() requestResponseMatcher { + newMatcher := &requestResponseMatcher{openMessagesMap: sync.Map{}} + return *newMatcher +} + +func (matcher *requestResponseMatcher) registerRequest(ident string, request *http.Request, captureTime time.Time) *requestResponsePair { + split := splitIdent(ident) + key := genKey(split) + // fmt.Printf(">>> request key: %v\n", key) + + requestHTTPMessage := httpMessage{ + isRequest: true, + captureTime: captureTime, + orig: request, + } + + if response, found := matcher.openMessagesMap.LoadAndDelete(key); found { + // Type assertion always succeeds because all of the map's values are of httpMessage type + responseHTTPMessage := response.(*httpMessage) + if responseHTTPMessage.isRequest { + SilentError("Request-Duplicate", "Got duplicate request with same identifier") + return nil + } + Trace("Matched open Response for %s", key) + return matcher.preparePair(&requestHTTPMessage, responseHTTPMessage) + } + + matcher.openMessagesMap.Store(key, &requestHTTPMessage) + Trace("Registered open Request for %s", key) + return nil +} + +func (matcher *requestResponseMatcher) registerResponse(ident string, response *http.Response, captureTime time.Time) *requestResponsePair { + split := splitIdent(ident) + key := genKey(split) + // fmt.Printf(">>> response key: %v\n", key) + + responseHTTPMessage := httpMessage{ + isRequest: false, + captureTime: captureTime, + orig: response, + } + + if request, found := matcher.openMessagesMap.LoadAndDelete(key); found { + // Type assertion always succeeds because all of the map's values are of httpMessage type + requestHTTPMessage := request.(*httpMessage) + if !requestHTTPMessage.isRequest { + SilentError("Response-Duplicate", "Got duplicate response with same identifier") + return nil + } + Trace("Matched open Request for %s", key) + return matcher.preparePair(requestHTTPMessage, &responseHTTPMessage) + } + + matcher.openMessagesMap.Store(key, &responseHTTPMessage) + Trace("Registered open Response for %s", key) + return nil +} + +func (matcher *requestResponseMatcher) preparePair(requestHTTPMessage *httpMessage, responseHTTPMessage *httpMessage) *requestResponsePair { + return &requestResponsePair{ + Request: *requestHTTPMessage, + Response: *responseHTTPMessage, + } +} + +func splitIdent(ident string) []string { + ident = strings.Replace(ident, "->", " ", -1) + return strings.Split(ident, " ") +} + +func genKey(split []string) string { + key := fmt.Sprintf("%s:%s->%s:%s,%s", split[0], split[2], split[1], split[3], split[4]) + return key +} + +func (matcher *requestResponseMatcher) deleteOlderThan(t time.Time) int { + numDeleted := 0 + + matcher.openMessagesMap.Range(func(key interface{}, value interface{}) bool { + message, _ := value.(*httpMessage) + if message.captureTime.Before(t) { + matcher.openMessagesMap.Delete(key) + numDeleted++ + } + return true + }) + + return numDeleted +} diff --git a/tap/extensions/kafka/main.go b/tap/extensions/kafka/main.go index 21179d183..10e63195a 100644 --- a/tap/extensions/kafka/main.go +++ b/tap/extensions/kafka/main.go @@ -23,10 +23,9 @@ func (g dissecting) Ping() { log.Printf("pong Kafka\n") } -func (g dissecting) Dissect(b *bufio.Reader, isClient bool) interface{} { +func (g dissecting) Dissect(b *bufio.Reader, isClient bool, tcpID *api.TcpID) interface{} { // TODO: Implement return nil } -// exported as symbol named "Greeter" var Dissector dissecting diff --git a/tap/tcp_stream.go b/tap/tcp_stream.go index 4cbfcf9db..9453423f5 100644 --- a/tap/tcp_stream.go +++ b/tap/tcp_stream.go @@ -7,13 +7,6 @@ import ( "github.com/google/gopacket/tcpassembly/tcpreader" ) -type tcpID struct { - srcIP string - dstIP string - srcPort string - dstPort string -} - type tcpReaderDataMsg struct { bytes []byte timestamp time.Time diff --git a/tap/tcp_stream_factory.go b/tap/tcp_stream_factory.go index 357ca59ac..39dcddb32 100644 --- a/tap/tcp_stream_factory.go +++ b/tap/tcp_stream_factory.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/romana/rlog" + "github.com/up9inc/mizu/tap/api" "github.com/google/gopacket" // pulls in all layers decoders "github.com/google/gopacket/tcpassembly" @@ -29,22 +30,22 @@ func containsPort(ports []string, port string) bool { return false } -func (h *tcpStream) clientRun() { +func (h *tcpStream) clientRun(tcpID *api.TcpID) { b := bufio.NewReader(&h.r) for _, extension := range extensions { if containsPort(extension.OutboundPorts, h.transport.Dst().String()) { extension.Dissector.Ping() - extension.Dissector.Dissect(b, true) + extension.Dissector.Dissect(b, true, tcpID) } } } -func (h *tcpStream) serverRun() { +func (h *tcpStream) serverRun(tcpID *api.TcpID) { b := bufio.NewReader(&h.r) for _, extension := range extensions { if containsPort(extension.OutboundPorts, h.transport.Src().String()) { extension.Dissector.Ping() - extension.Dissector.Dissect(b, false) + extension.Dissector.Dissect(b, false, tcpID) } } } @@ -56,10 +57,16 @@ func (h *tcpStreamFactory) New(net, transport gopacket.Flow) tcpassembly.Stream transport: transport, r: tcpreader.NewReaderStream(), } + tcpID := &api.TcpID{ + SrcIP: net.Src().String(), + DstIP: net.Dst().String(), + SrcPort: transport.Src().String(), + DstPort: transport.Dst().String(), + } if containsPort(allOutboundPorts, transport.Dst().String()) { - go stream.clientRun() + go stream.clientRun(tcpID) } else if containsPort(allOutboundPorts, transport.Src().String()) { - go stream.serverRun() + go stream.serverRun(tcpID) } return &stream.r }