Handle worker restarts without stopping deployments

This commit is contained in:
Trevor Slocum 2023-04-16 09:50:29 -07:00
parent 588877e6ee
commit 49d2a8f1c7
5 changed files with 162 additions and 24 deletions

View File

@ -5,12 +5,15 @@ import (
"log"
"net"
"os"
"time"
"code.rocketnine.space/tslocum/beehive"
)
var worker *beehive.Worker
const retryDelay = time.Second * 2
func main() {
var (
configPath string
@ -29,12 +32,29 @@ func main() {
}
}
log.Println("Verifying Docker is running and available...")
for {
_, _, err := beehive.Docker("/", []string{"ps"})
if err != nil {
log.Printf("Failed to connect to Docker daemon: %s", err)
time.Sleep(retryDelay)
continue
}
break
}
log.Println("Connected to Docker successfully")
worker = beehive.NewWorker(config.ID, config.IP, config.Festoons, config.Deployments)
conn, err := net.Dial("tcp", config.Queen)
if err != nil {
println("Dial failed:", err.Error())
os.Exit(1)
var conn net.Conn
for {
conn, err = net.Dial("tcp", config.Queen)
if err != nil {
log.Printf("Failed to connect to queen: %s", err)
time.Sleep(retryDelay)
continue
}
break
}
client := beehive.NewClient(conn)

View File

@ -15,6 +15,84 @@ import (
"time"
)
type DeploymentStatus int
// Note: Entries must only be appended to this list.
const (
StatusUnknown DeploymentStatus = iota
StatusAttach
StatusCommit
StatusCopy
StatusCreate
StatusDestroy
StatusDetach
StatusDie
StatusExecCreate
StatusExecDetach
StatusExecDie
StatusExecStart
StatusExport
StatusHealthStatus
StatusKill
StatusOOM
StatusPause
StatusRename
StatusResize
StatusRestart
StatusStart
StatusStop
StatusTop
StatusUnpause
StatusUpdate
)
var DeploymentStatusLabels = map[string]DeploymentStatus{
"attach": StatusAttach,
"commit": StatusCommit,
"copy": StatusCopy,
"create": StatusCreate,
"destroy": StatusDestroy,
"detach": StatusDetach,
"die": StatusDie,
"exec_create": StatusExecCreate,
"exec_detach": StatusExecDetach,
"exec_die": StatusExecDie,
"exec_start": StatusExecStart,
"export": StatusExport,
"health_status": StatusHealthStatus,
"kill": StatusKill,
"oom": StatusOOM,
"pause": StatusPause,
"rename": StatusRename,
"resize": StatusResize,
"restart": StatusRestart,
"start": StatusStart,
"stop": StatusStop,
"top": StatusTop,
"unpause": StatusUnpause,
"update": StatusUpdate,
}
var RecordedDeploymentStatuses = []DeploymentStatus{
StatusCreate,
StatusDestroy,
StatusDie,
StatusKill,
StatusOOM,
StatusRestart,
StatusStart,
StatusStop,
}
func ParseDeploymentStatus(status string) DeploymentStatus {
return DeploymentStatusLabels[status]
}
type DeploymentEvent struct {
Time int64
Status DeploymentStatus
}
type Deployment struct {
ID int
@ -30,6 +108,8 @@ type Deployment struct {
Ports []int
Worker *Worker
Events []DeploymentEvent
}
var replacementPort = regexp.MustCompile(`(HOSTALGIA_PORT_[A-Z])`)
@ -117,6 +197,19 @@ func (d *Deployment) deploy() error {
}
} else if !fileInfo.IsDir() {
return fmt.Errorf("invalid output directory: %s", d.Dir())
} else {
_, err = os.Stat(path.Join(d.Dir(), "docker-compose.yml"))
if err != nil {
if !os.IsNotExist(err) {
log.Fatalf("failed to check for existing docker-compose.yml: %s", err)
}
} else {
log.Printf("Stopping deployment %s...", d.Label())
_, _, err = DockerCompose(d.Dir(), []string{"stop"})
if err != nil {
log.Printf("failed to stop running deployment: %s", err)
}
}
}
err = d.interpolateAndCopy(path.Join(festoonPath, "docker-compose.yml"), path.Join(d.Dir(), "docker-compose.yml"))
@ -158,17 +251,17 @@ func (d *Deployment) deploy() error {
}
}
stdOut, stdErr, err := DockerCompose(d.Dir(), []string{"up", "-d"})
log.Printf("Starting deployment %s...", d.Label())
_, stdErr, err := DockerCompose(d.Dir(), []string{"up", "-d"})
if bytes.Contains(stdErr, []byte(fmt.Sprintf("%s is up-to-date", d.Label()))) {
log.Printf("Warning: %s was already up", d.Label())
d.Events = append(d.Events, DeploymentEvent{
Time: time.Now().Unix(),
Status: StatusAttach,
})
} else if err != nil {
return fmt.Errorf("failed to bring deployment up: %s", err)
}
log.Printf("docker compose stdOut: %s", stdOut)
log.Printf("docker compose stdErr: %s", stdErr)
log.Println("deployment UP!")
return nil
}
@ -212,7 +305,31 @@ func (d *Deployment) handleEvents() {
if l == 10 {
continue
}
log.Println("Container status", string(b[5:l-5]))
statusString := string(b[5 : l-5])
status := ParseDeploymentStatus(statusString)
if status == StatusUnknown {
log.Printf("Warning: Deployment %s has unknown status %s", d.Label(), statusString)
}
var recordEvent bool
for _, recordStatus := range RecordedDeploymentStatuses {
if status == recordStatus {
recordEvent = true
break
}
}
if !recordEvent {
continue
}
event := DeploymentEvent{
Time: time.Now().Unix(),
Status: status,
}
d.Events = append(d.Events, event)
}
if scanner.Err() != nil {
log.Fatal("scanner error", scanner.Err())

View File

@ -87,6 +87,8 @@ func (s *Server) sendTestingTask(c *Client) {
t = NewTask(TaskDeploy, parameters)
s.sendTask(c.Worker, t)
time.Sleep(time.Second * 10)
t = NewTask(TaskHealth, map[string]string{
"time": fmt.Sprintf("%d", time.Now().UnixNano()),
})

View File

@ -5,7 +5,6 @@ import (
"context"
"errors"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
@ -30,7 +29,7 @@ func Serialize(object interface{}, p string) error {
os.MkdirAll(path.Dir(p), 0)
err = ioutil.WriteFile(p, out, 0600)
err = os.WriteFile(p, out, 0600)
if err != nil {
return fmt.Errorf("failed to write to %s: %s", p, err)
}
@ -49,7 +48,7 @@ func Deserialize(object interface{}, path string) error {
return nil
}
configData, err := ioutil.ReadFile(path)
configData, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read file: %s", err)
}

View File

@ -7,6 +7,7 @@ import (
"fmt"
"log"
"strconv"
"time"
)
type Worker struct {
@ -76,17 +77,15 @@ func (w *Worker) HandleRead(c *Client) {
})
for _, d := range w.Deployments {
stdOut, stdErr, err := Docker("", []string{"ps", "--filter", "name=" + d.Label(), "--format", "{{ .State }}"})
eventsJson, err := json.Marshal(d.Events)
if err != nil {
log.Printf("failed to check status of deployment %d: %s", d.Label(), err)
log.Fatal(err)
}
if len(stdErr) > 0 {
log.Printf("failed to check status of deployment %d: %s", d.Label(), stdErr)
} else if len(stdOut) > 0 {
// TODO validate stdout matches status
result.Parameters[fmt.Sprintf("status_%d", d.ID)] = string(stdOut)
}
result.Parameters[fmt.Sprintf("events_%d", d.ID)] = string(eventsJson)
d.Events = d.Events[:0]
// TODO deployment mutex
}
resultJson, err := json.Marshal(result)
@ -123,13 +122,14 @@ func (w *Worker) HandleRead(c *Client) {
Worker: w,
}
go d.handleEvents()
time.Sleep(10 * time.Millisecond) // Give events handler some time to attach.
err = d.deploy()
if err != nil {
log.Fatalf("failed to deploy %+v: %s", d, err)
}
go d.handleEvents()
w.Deployments = append(w.Deployments, d)
// Send result