Handle worker restarts without stopping deployments
This commit is contained in:
parent
588877e6ee
commit
49d2a8f1c7
|
@ -5,12 +5,15 @@ import (
|
|||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"code.rocketnine.space/tslocum/beehive"
|
||||
)
|
||||
|
||||
var worker *beehive.Worker
|
||||
|
||||
const retryDelay = time.Second * 2
|
||||
|
||||
func main() {
|
||||
var (
|
||||
configPath string
|
||||
|
@ -29,12 +32,29 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
log.Println("Verifying Docker is running and available...")
|
||||
for {
|
||||
_, _, err := beehive.Docker("/", []string{"ps"})
|
||||
if err != nil {
|
||||
log.Printf("Failed to connect to Docker daemon: %s", err)
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
log.Println("Connected to Docker successfully")
|
||||
|
||||
worker = beehive.NewWorker(config.ID, config.IP, config.Festoons, config.Deployments)
|
||||
|
||||
conn, err := net.Dial("tcp", config.Queen)
|
||||
if err != nil {
|
||||
println("Dial failed:", err.Error())
|
||||
os.Exit(1)
|
||||
var conn net.Conn
|
||||
for {
|
||||
conn, err = net.Dial("tcp", config.Queen)
|
||||
if err != nil {
|
||||
log.Printf("Failed to connect to queen: %s", err)
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
client := beehive.NewClient(conn)
|
||||
|
|
131
deployment.go
131
deployment.go
|
@ -15,6 +15,84 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
type DeploymentStatus int
|
||||
|
||||
// Note: Entries must only be appended to this list.
|
||||
const (
|
||||
StatusUnknown DeploymentStatus = iota
|
||||
StatusAttach
|
||||
StatusCommit
|
||||
StatusCopy
|
||||
StatusCreate
|
||||
StatusDestroy
|
||||
StatusDetach
|
||||
StatusDie
|
||||
StatusExecCreate
|
||||
StatusExecDetach
|
||||
StatusExecDie
|
||||
StatusExecStart
|
||||
StatusExport
|
||||
StatusHealthStatus
|
||||
StatusKill
|
||||
StatusOOM
|
||||
StatusPause
|
||||
StatusRename
|
||||
StatusResize
|
||||
StatusRestart
|
||||
StatusStart
|
||||
StatusStop
|
||||
StatusTop
|
||||
StatusUnpause
|
||||
StatusUpdate
|
||||
)
|
||||
|
||||
var DeploymentStatusLabels = map[string]DeploymentStatus{
|
||||
"attach": StatusAttach,
|
||||
"commit": StatusCommit,
|
||||
"copy": StatusCopy,
|
||||
"create": StatusCreate,
|
||||
"destroy": StatusDestroy,
|
||||
"detach": StatusDetach,
|
||||
"die": StatusDie,
|
||||
"exec_create": StatusExecCreate,
|
||||
"exec_detach": StatusExecDetach,
|
||||
"exec_die": StatusExecDie,
|
||||
"exec_start": StatusExecStart,
|
||||
"export": StatusExport,
|
||||
"health_status": StatusHealthStatus,
|
||||
"kill": StatusKill,
|
||||
"oom": StatusOOM,
|
||||
"pause": StatusPause,
|
||||
"rename": StatusRename,
|
||||
"resize": StatusResize,
|
||||
"restart": StatusRestart,
|
||||
"start": StatusStart,
|
||||
"stop": StatusStop,
|
||||
"top": StatusTop,
|
||||
"unpause": StatusUnpause,
|
||||
"update": StatusUpdate,
|
||||
}
|
||||
|
||||
var RecordedDeploymentStatuses = []DeploymentStatus{
|
||||
StatusCreate,
|
||||
StatusDestroy,
|
||||
StatusDie,
|
||||
StatusKill,
|
||||
StatusOOM,
|
||||
StatusRestart,
|
||||
StatusStart,
|
||||
StatusStop,
|
||||
}
|
||||
|
||||
func ParseDeploymentStatus(status string) DeploymentStatus {
|
||||
return DeploymentStatusLabels[status]
|
||||
}
|
||||
|
||||
type DeploymentEvent struct {
|
||||
Time int64
|
||||
Status DeploymentStatus
|
||||
}
|
||||
|
||||
type Deployment struct {
|
||||
ID int
|
||||
|
||||
|
@ -30,6 +108,8 @@ type Deployment struct {
|
|||
Ports []int
|
||||
|
||||
Worker *Worker
|
||||
|
||||
Events []DeploymentEvent
|
||||
}
|
||||
|
||||
var replacementPort = regexp.MustCompile(`(HOSTALGIA_PORT_[A-Z])`)
|
||||
|
@ -117,6 +197,19 @@ func (d *Deployment) deploy() error {
|
|||
}
|
||||
} else if !fileInfo.IsDir() {
|
||||
return fmt.Errorf("invalid output directory: %s", d.Dir())
|
||||
} else {
|
||||
_, err = os.Stat(path.Join(d.Dir(), "docker-compose.yml"))
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("failed to check for existing docker-compose.yml: %s", err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("Stopping deployment %s...", d.Label())
|
||||
_, _, err = DockerCompose(d.Dir(), []string{"stop"})
|
||||
if err != nil {
|
||||
log.Printf("failed to stop running deployment: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = d.interpolateAndCopy(path.Join(festoonPath, "docker-compose.yml"), path.Join(d.Dir(), "docker-compose.yml"))
|
||||
|
@ -158,17 +251,17 @@ func (d *Deployment) deploy() error {
|
|||
}
|
||||
}
|
||||
|
||||
stdOut, stdErr, err := DockerCompose(d.Dir(), []string{"up", "-d"})
|
||||
log.Printf("Starting deployment %s...", d.Label())
|
||||
_, stdErr, err := DockerCompose(d.Dir(), []string{"up", "-d"})
|
||||
if bytes.Contains(stdErr, []byte(fmt.Sprintf("%s is up-to-date", d.Label()))) {
|
||||
log.Printf("Warning: %s was already up", d.Label())
|
||||
d.Events = append(d.Events, DeploymentEvent{
|
||||
Time: time.Now().Unix(),
|
||||
Status: StatusAttach,
|
||||
})
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("failed to bring deployment up: %s", err)
|
||||
}
|
||||
log.Printf("docker compose stdOut: %s", stdOut)
|
||||
log.Printf("docker compose stdErr: %s", stdErr)
|
||||
|
||||
log.Println("deployment UP!")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -212,7 +305,31 @@ func (d *Deployment) handleEvents() {
|
|||
if l == 10 {
|
||||
continue
|
||||
}
|
||||
log.Println("Container status", string(b[5:l-5]))
|
||||
|
||||
statusString := string(b[5 : l-5])
|
||||
|
||||
status := ParseDeploymentStatus(statusString)
|
||||
if status == StatusUnknown {
|
||||
log.Printf("Warning: Deployment %s has unknown status %s", d.Label(), statusString)
|
||||
}
|
||||
|
||||
var recordEvent bool
|
||||
for _, recordStatus := range RecordedDeploymentStatuses {
|
||||
if status == recordStatus {
|
||||
recordEvent = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !recordEvent {
|
||||
continue
|
||||
}
|
||||
|
||||
event := DeploymentEvent{
|
||||
Time: time.Now().Unix(),
|
||||
Status: status,
|
||||
}
|
||||
|
||||
d.Events = append(d.Events, event)
|
||||
}
|
||||
if scanner.Err() != nil {
|
||||
log.Fatal("scanner error", scanner.Err())
|
||||
|
|
|
@ -87,6 +87,8 @@ func (s *Server) sendTestingTask(c *Client) {
|
|||
t = NewTask(TaskDeploy, parameters)
|
||||
s.sendTask(c.Worker, t)
|
||||
|
||||
time.Sleep(time.Second * 10)
|
||||
|
||||
t = NewTask(TaskHealth, map[string]string{
|
||||
"time": fmt.Sprintf("%d", time.Now().UnixNano()),
|
||||
})
|
||||
|
|
5
util.go
5
util.go
|
@ -5,7 +5,6 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -30,7 +29,7 @@ func Serialize(object interface{}, p string) error {
|
|||
|
||||
os.MkdirAll(path.Dir(p), 0)
|
||||
|
||||
err = ioutil.WriteFile(p, out, 0600)
|
||||
err = os.WriteFile(p, out, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write to %s: %s", p, err)
|
||||
}
|
||||
|
@ -49,7 +48,7 @@ func Deserialize(object interface{}, path string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
configData, err := ioutil.ReadFile(path)
|
||||
configData, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read file: %s", err)
|
||||
}
|
||||
|
|
20
worker.go
20
worker.go
|
@ -7,6 +7,7 @@ import (
|
|||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
|
@ -76,17 +77,15 @@ func (w *Worker) HandleRead(c *Client) {
|
|||
})
|
||||
|
||||
for _, d := range w.Deployments {
|
||||
stdOut, stdErr, err := Docker("", []string{"ps", "--filter", "name=" + d.Label(), "--format", "{{ .State }}"})
|
||||
eventsJson, err := json.Marshal(d.Events)
|
||||
if err != nil {
|
||||
log.Printf("failed to check status of deployment %d: %s", d.Label(), err)
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if len(stdErr) > 0 {
|
||||
log.Printf("failed to check status of deployment %d: %s", d.Label(), stdErr)
|
||||
} else if len(stdOut) > 0 {
|
||||
// TODO validate stdout matches status
|
||||
result.Parameters[fmt.Sprintf("status_%d", d.ID)] = string(stdOut)
|
||||
}
|
||||
result.Parameters[fmt.Sprintf("events_%d", d.ID)] = string(eventsJson)
|
||||
|
||||
d.Events = d.Events[:0]
|
||||
// TODO deployment mutex
|
||||
}
|
||||
|
||||
resultJson, err := json.Marshal(result)
|
||||
|
@ -123,13 +122,14 @@ func (w *Worker) HandleRead(c *Client) {
|
|||
Worker: w,
|
||||
}
|
||||
|
||||
go d.handleEvents()
|
||||
time.Sleep(10 * time.Millisecond) // Give events handler some time to attach.
|
||||
|
||||
err = d.deploy()
|
||||
if err != nil {
|
||||
log.Fatalf("failed to deploy %+v: %s", d, err)
|
||||
}
|
||||
|
||||
go d.handleEvents()
|
||||
|
||||
w.Deployments = append(w.Deployments, d)
|
||||
|
||||
// Send result
|
||||
|
|
Loading…
Reference in New Issue