mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-04 12:23:13 +00:00 
			
		
		
		
	Fix shutdown waitgroup panic (#35676)
This PR fixes a panic issue in the WaitGroup that occurs when Gitea is shut down using Ctrl+C. It ensures that all active connection pointers in the server are properly tracked and forcibly closed when the hammer shutdown is invoked. The process remains graceful — the normal shutdown sequence runs before the hammer is triggered, and existing connections are given a timeout period to complete gracefully. This PR also fixes `no logger writer` problem. Now the log close will only be invoked when the command exit. - Fixes #35468 - Fixes #35551 - Fixes #35559 - Replace #35578 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		@@ -156,7 +156,6 @@ func serveInstall(cmd *cli.Command) error {
 | 
			
		||||
	case <-graceful.GetManager().IsShutdown():
 | 
			
		||||
		<-graceful.GetManager().Done()
 | 
			
		||||
		log.Info("PID: %d Gitea Web Finished", os.Getpid())
 | 
			
		||||
		log.GetManager().Close()
 | 
			
		||||
		return err
 | 
			
		||||
	default:
 | 
			
		||||
	}
 | 
			
		||||
@@ -231,7 +230,6 @@ func serveInstalled(c *cli.Command) error {
 | 
			
		||||
	err := listen(webRoutes, true)
 | 
			
		||||
	<-graceful.GetManager().Done()
 | 
			
		||||
	log.Info("PID: %d Gitea Web Finished", os.Getpid())
 | 
			
		||||
	log.GetManager().Close()
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								main.go
									
									
									
									
									
								
							@@ -44,6 +44,7 @@ func main() {
 | 
			
		||||
	}
 | 
			
		||||
	app := cmd.NewMainApp(cmd.AppVersion{Version: Version, Extra: formatBuiltWith()})
 | 
			
		||||
	_ = cmd.RunMainApp(app, os.Args...) // all errors should have been handled by the RunMainApp
 | 
			
		||||
	// flush the queued logs before exiting, it is a MUST, otherwise there will be log loss
 | 
			
		||||
	log.GetManager().Close()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,6 @@ import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"sync/atomic"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
@@ -30,12 +29,15 @@ type ServeFunction = func(net.Listener) error
 | 
			
		||||
 | 
			
		||||
// Server represents our graceful server
 | 
			
		||||
type Server struct {
 | 
			
		||||
	network              string
 | 
			
		||||
	address              string
 | 
			
		||||
	listener             net.Listener
 | 
			
		||||
	wg                   sync.WaitGroup
 | 
			
		||||
	state                state
 | 
			
		||||
	lock                 *sync.RWMutex
 | 
			
		||||
	network  string
 | 
			
		||||
	address  string
 | 
			
		||||
	listener net.Listener
 | 
			
		||||
 | 
			
		||||
	lock          sync.RWMutex
 | 
			
		||||
	state         state
 | 
			
		||||
	connCounter   int64
 | 
			
		||||
	connEmptyCond *sync.Cond
 | 
			
		||||
 | 
			
		||||
	BeforeBegin          func(network, address string)
 | 
			
		||||
	OnShutdown           func()
 | 
			
		||||
	PerWriteTimeout      time.Duration
 | 
			
		||||
@@ -50,14 +52,13 @@ func NewServer(network, address, name string) *Server {
 | 
			
		||||
		log.Info("Starting new %s server: %s:%s on PID: %d", name, network, address, os.Getpid())
 | 
			
		||||
	}
 | 
			
		||||
	srv := &Server{
 | 
			
		||||
		wg:                   sync.WaitGroup{},
 | 
			
		||||
		state:                stateInit,
 | 
			
		||||
		lock:                 &sync.RWMutex{},
 | 
			
		||||
		network:              network,
 | 
			
		||||
		address:              address,
 | 
			
		||||
		PerWriteTimeout:      setting.PerWriteTimeout,
 | 
			
		||||
		PerWritePerKbTimeout: setting.PerWritePerKbTimeout,
 | 
			
		||||
	}
 | 
			
		||||
	srv.connEmptyCond = sync.NewCond(&srv.lock)
 | 
			
		||||
 | 
			
		||||
	srv.BeforeBegin = func(network, addr string) {
 | 
			
		||||
		log.Debug("Starting server on %s:%s (PID: %d)", network, addr, syscall.Getpid())
 | 
			
		||||
@@ -154,7 +155,7 @@ func (srv *Server) Serve(serve ServeFunction) error {
 | 
			
		||||
	GetManager().RegisterServer()
 | 
			
		||||
	err := serve(srv.listener)
 | 
			
		||||
	log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid())
 | 
			
		||||
	srv.wg.Wait()
 | 
			
		||||
	srv.waitForActiveConnections()
 | 
			
		||||
	srv.setState(stateTerminate)
 | 
			
		||||
	GetManager().ServerDone()
 | 
			
		||||
	// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil
 | 
			
		||||
@@ -178,16 +179,62 @@ func (srv *Server) setState(st state) {
 | 
			
		||||
	srv.state = st
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (srv *Server) waitForActiveConnections() {
 | 
			
		||||
	srv.lock.Lock()
 | 
			
		||||
	for srv.connCounter > 0 {
 | 
			
		||||
		srv.connEmptyCond.Wait()
 | 
			
		||||
	}
 | 
			
		||||
	srv.lock.Unlock()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (srv *Server) wrapConnection(c net.Conn) (net.Conn, error) {
 | 
			
		||||
	srv.lock.Lock()
 | 
			
		||||
	defer srv.lock.Unlock()
 | 
			
		||||
 | 
			
		||||
	if srv.state != stateRunning {
 | 
			
		||||
		_ = c.Close()
 | 
			
		||||
		return nil, syscall.EINVAL // same as AcceptTCP
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	srv.connCounter++
 | 
			
		||||
	return &wrappedConn{Conn: c, server: srv}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (srv *Server) removeConnection(_ *wrappedConn) {
 | 
			
		||||
	srv.lock.Lock()
 | 
			
		||||
	defer srv.lock.Unlock()
 | 
			
		||||
 | 
			
		||||
	srv.connCounter--
 | 
			
		||||
	if srv.connCounter <= 0 {
 | 
			
		||||
		srv.connEmptyCond.Broadcast()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// closeAllConnections forcefully closes all active connections
 | 
			
		||||
func (srv *Server) closeAllConnections() {
 | 
			
		||||
	srv.lock.Lock()
 | 
			
		||||
	if srv.connCounter > 0 {
 | 
			
		||||
		log.Warn("After graceful shutdown period, %d connections are still active. Forcefully close.", srv.connCounter)
 | 
			
		||||
		srv.connCounter = 0 // OS will close all the connections after the process exits, so we just assume there is no active connection now
 | 
			
		||||
	}
 | 
			
		||||
	srv.lock.Unlock()
 | 
			
		||||
	srv.connEmptyCond.Broadcast()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type filer interface {
 | 
			
		||||
	File() (*os.File, error)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type wrappedListener struct {
 | 
			
		||||
	net.Listener
 | 
			
		||||
	stopped bool
 | 
			
		||||
	server  *Server
 | 
			
		||||
	server *Server
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	_ net.Listener = (*wrappedListener)(nil)
 | 
			
		||||
	_ filer        = (*wrappedListener)(nil)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func newWrappedListener(l net.Listener, srv *Server) *wrappedListener {
 | 
			
		||||
	return &wrappedListener{
 | 
			
		||||
		Listener: l,
 | 
			
		||||
@@ -195,46 +242,24 @@ func newWrappedListener(l net.Listener, srv *Server) *wrappedListener {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (wl *wrappedListener) Accept() (net.Conn, error) {
 | 
			
		||||
	var c net.Conn
 | 
			
		||||
	// Set keepalive on TCPListeners connections.
 | 
			
		||||
func (wl *wrappedListener) Accept() (c net.Conn, err error) {
 | 
			
		||||
	if tcl, ok := wl.Listener.(*net.TCPListener); ok {
 | 
			
		||||
		// Set keepalive on TCPListeners connections if possible, see http.tcpKeepAliveListener
 | 
			
		||||
		tc, err := tcl.AcceptTCP()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		_ = tc.SetKeepAlive(true)                  // see http.tcpKeepAliveListener
 | 
			
		||||
		_ = tc.SetKeepAlivePeriod(3 * time.Minute) // see http.tcpKeepAliveListener
 | 
			
		||||
		_ = tc.SetKeepAlive(true)
 | 
			
		||||
		_ = tc.SetKeepAlivePeriod(3 * time.Minute)
 | 
			
		||||
		c = tc
 | 
			
		||||
	} else {
 | 
			
		||||
		var err error
 | 
			
		||||
		c, err = wl.Listener.Accept()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	closed := int32(0)
 | 
			
		||||
 | 
			
		||||
	c = &wrappedConn{
 | 
			
		||||
		Conn:                 c,
 | 
			
		||||
		server:               wl.server,
 | 
			
		||||
		closed:               &closed,
 | 
			
		||||
		perWriteTimeout:      wl.server.PerWriteTimeout,
 | 
			
		||||
		perWritePerKbTimeout: wl.server.PerWritePerKbTimeout,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	wl.server.wg.Add(1)
 | 
			
		||||
	return c, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (wl *wrappedListener) Close() error {
 | 
			
		||||
	if wl.stopped {
 | 
			
		||||
		return syscall.EINVAL
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	wl.stopped = true
 | 
			
		||||
	return wl.Listener.Close()
 | 
			
		||||
	return wl.server.wrapConnection(c)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (wl *wrappedListener) File() (*os.File, error) {
 | 
			
		||||
@@ -244,17 +269,14 @@ func (wl *wrappedListener) File() (*os.File, error) {
 | 
			
		||||
 | 
			
		||||
type wrappedConn struct {
 | 
			
		||||
	net.Conn
 | 
			
		||||
	server               *Server
 | 
			
		||||
	closed               *int32
 | 
			
		||||
	deadline             time.Time
 | 
			
		||||
	perWriteTimeout      time.Duration
 | 
			
		||||
	perWritePerKbTimeout time.Duration
 | 
			
		||||
	server   *Server
 | 
			
		||||
	deadline time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *wrappedConn) Write(p []byte) (n int, err error) {
 | 
			
		||||
	if w.perWriteTimeout > 0 {
 | 
			
		||||
		minTimeout := time.Duration(len(p)/1024) * w.perWritePerKbTimeout
 | 
			
		||||
		minDeadline := time.Now().Add(minTimeout).Add(w.perWriteTimeout)
 | 
			
		||||
	if w.server.PerWriteTimeout > 0 {
 | 
			
		||||
		minTimeout := time.Duration(len(p)/1024) * w.server.PerWritePerKbTimeout
 | 
			
		||||
		minDeadline := time.Now().Add(minTimeout).Add(w.server.PerWriteTimeout)
 | 
			
		||||
 | 
			
		||||
		w.deadline = w.deadline.Add(minTimeout)
 | 
			
		||||
		if minDeadline.After(w.deadline) {
 | 
			
		||||
@@ -266,19 +288,6 @@ func (w *wrappedConn) Write(p []byte) (n int, err error) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (w *wrappedConn) Close() error {
 | 
			
		||||
	if atomic.CompareAndSwapInt32(w.closed, 0, 1) {
 | 
			
		||||
		defer func() {
 | 
			
		||||
			if err := recover(); err != nil {
 | 
			
		||||
				select {
 | 
			
		||||
				case <-GetManager().IsHammer():
 | 
			
		||||
					// Likely deadlocked request released at hammertime
 | 
			
		||||
					log.Warn("Panic during connection close! %v. Likely there has been a deadlocked request which has been released by forced shutdown.", err)
 | 
			
		||||
				default:
 | 
			
		||||
					log.Error("Panic during connection close! %v", err)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}()
 | 
			
		||||
		w.server.wg.Done()
 | 
			
		||||
	}
 | 
			
		||||
	w.server.removeConnection(w)
 | 
			
		||||
	return w.Conn.Close()
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,6 @@ package graceful
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"os"
 | 
			
		||||
	"runtime"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
)
 | 
			
		||||
@@ -48,26 +47,8 @@ func (srv *Server) doShutdown() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (srv *Server) doHammer() {
 | 
			
		||||
	defer func() {
 | 
			
		||||
		// We call srv.wg.Done() until it panics.
 | 
			
		||||
		// This happens if we call Done() when the WaitGroup counter is already at 0
 | 
			
		||||
		// So if it panics -> we're done, Serve() will return and the
 | 
			
		||||
		// parent will goroutine will exit.
 | 
			
		||||
		if r := recover(); r != nil {
 | 
			
		||||
			log.Error("WaitGroup at 0: Error: %v", r)
 | 
			
		||||
		}
 | 
			
		||||
	}()
 | 
			
		||||
	if srv.getState() != stateShuttingDown {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	log.Warn("Forcefully shutting down parent")
 | 
			
		||||
	for {
 | 
			
		||||
		if srv.getState() == stateTerminate {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		srv.wg.Done()
 | 
			
		||||
 | 
			
		||||
		// Give other goroutines a chance to finish before we forcibly stop them.
 | 
			
		||||
		runtime.Gosched()
 | 
			
		||||
	}
 | 
			
		||||
	srv.closeAllConnections()
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user