server.garden privileged automation agent (mirror of https://git.sequentialread.com/forest/rootsystem)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

360 lines
11 KiB

package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
errors "git.sequentialread.com/forest/pkg-errors"
"git.sequentialread.com/forest/rootsystem/automation"
"git.sequentialread.com/forest/rootsystem/configuration"
"git.sequentialread.com/forest/rootsystem/objectStorage"
"git.sequentialread.com/forest/rootsystem/pki"
)
type applicationState struct {
workingDirectory string
storage objectStorage.ObjectStorager
}
var global applicationState
func main() {
config, workingDirectory, err := configuration.LoadConfiguration()
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because loadConfiguration() returned")))
}
global.workingDirectory = workingDirectory
storage, err := objectStorage.InitializeObjectStorage(config, true)
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because failed to initialize object storage")))
}
global.storage = storage
go terraformStateServer()
results := automation.DoInParallel(
func() automation.TaskResult {
// This creates an access key that the gateway cloud instance can use to upload its SSH public key
// to our object storage. the host-key-poller will download this SSH host public key and add it to our known_hosts
// so that we can SSH to the gateway instance securely
hostKeysAccessSpec := objectStorage.ObjectStorageKey{
Name: "rootsystem-known-hosts",
PathPrefix: "rootsystem/known-hosts",
Read: true,
Write: true,
Delete: false,
List: false,
}
knownHostsCredentials, err := global.storage.CreateAccessKeyIfNotExists(hostKeysAccessSpec)
if err != nil {
return automation.TaskResult{
Name: "knownHostsCredentials",
Err: errors.Wrap(err, "can't create object storage access key for known_hosts"),
}
}
return automation.TaskResult{
Name: "knownHostsCredentials",
Result: knownHostsCredentials,
}
},
func() automation.TaskResult {
// BuildTLSCertsForThreshold fills in the CAs, Keys, and Certificates in the Threshold ansible roles.
// So when terraform invokes ansible to install threshold client/server, it will install working
// certificates and keys
err = pki.BuildTLSCertsForThreshold(
global.workingDirectory,
config.Terraform.Variables["domain_name"],
config.Host.Name,
global.storage,
)
if err != nil {
return automation.TaskResult{
Name: "buildTLSCertsForThreshold",
Err: errors.Wrap(err, "can't create certs for threshold"),
}
}
return automation.TaskResult{Name: "buildTLSCertsForThreshold"}
},
func() automation.TaskResult {
sshPort := 2201
hostSSHPortFilename := fmt.Sprintf("rootsystem/ssh/%s.txt", config.Host.Name)
file, notFound, err := global.storage.Get(hostSSHPortFilename)
if err != nil && !notFound {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't download %s", hostSSHPortFilename),
}
}
if !notFound {
sshPort, err = strconv.Atoi(string(file.Content))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't read %s as a number", hostSSHPortFilename),
}
}
} else {
file, notFound, err := global.storage.Get("rootsystem/ssh/next-port.txt")
if err != nil && !notFound {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrap(err, "can't download next-port.txt"),
}
}
if !notFound {
sshPort, err = strconv.Atoi(string(file.Content))
if err != nil {
sshPort = 2201
log.Printf("warning: next-port.txt did not contain a number. defaulting to %d. contents: %s\n", sshPort, string(file.Content))
}
}
err = global.storage.Put(hostSSHPortFilename, []byte(strconv.Itoa(sshPort)))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't can't upload %s", hostSSHPortFilename),
}
}
err = global.storage.Put("rootsystem/ssh/next-port.txt", []byte(strconv.Itoa(sshPort+1)))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrap(err, "can't can't upload next-port.txt"),
}
}
}
return automation.TaskResult{
Name: "sshPort",
Result: sshPort,
}
},
func() automation.TaskResult {
// Add 1 to the build number, each time rootsystem runs is a different build.
file, notFound, err := global.storage.Get("rootsystem/automation/build-number.txt")
if err != nil && !notFound {
return automation.TaskResult{
Name: "buildNumber",
Err: errors.Wrap(err, "can't download build-number.txt"),
}
}
buildNumber := 1
if !notFound {
n, err := strconv.Atoi(string(file.Content))
if err != nil {
log.Printf("warning: build-number.txt did not contain a number. defaulting to build number 1. contents: %s\n", string(file.Content))
} else {
buildNumber = n + 1
}
}
err = global.storage.Put("rootsystem/automation/build-number.txt", []byte(strconv.Itoa(buildNumber)))
if err != nil {
return automation.TaskResult{
Name: "buildNumber",
Err: errors.Wrap(err, "can't can't upload build-number.txt"),
}
}
return automation.TaskResult{
Name: "buildNumber",
Result: buildNumber,
}
},
)
for _, result := range results {
if result.Err != nil {
panic(fmt.Sprintf("can't start rootsystem because %s: %+v", result.Name, result.Err))
}
}
//sshPort := results["sshPort"].Result.(int)
buildNumber := results["buildNumber"].Result.(int)
knownHostsCredentials := results["knownHostsCredentials"].Result.([]configuration.Credential)
// First, run the terraform build for the GLOBAL components, meaning the components
// that exist in the cloud, independent of how many server.garden nodes are being used.
outputVariables, success, err := terraformBuild(
config,
automation.TerraformConfiguration{
BuildNumber: buildNumber,
TargetedModules: config.Terraform.GlobalModules,
TerraformProject: configuration.GLOBAL_TERRAFORM_PROJECT,
HostKeysObjectStorageCredentials: knownHostsCredentials,
},
)
if err != nil {
log.Printf("rootsystem %s build errored out (exception): %+v", configuration.GLOBAL_TERRAFORM_PROJECT, err)
// Don't crash the app if the TF build failed, just sit there and do nothing. User has to do something to
// fix the build before we run again.
//panic(fmt.Sprintf("%+v", err))
} else if !success {
log.Printf("rootsystem %s build failed", configuration.GLOBAL_TERRAFORM_PROJECT)
} else {
// Next, we run a separate LOCAL terraform build which is specific to THIS server.garden node,
// this build will be responsible for installing software on this node & registering this node with the
// cloud resources
projectName := fmt.Sprintf("%s-%s", configuration.LOCAL_TERRAFORM_PROJECT, config.Host.Name)
_, success, err = terraformBuild(
config,
automation.TerraformConfiguration{
BuildNumber: buildNumber,
TargetedModules: config.Terraform.LocalModules,
TerraformProject: projectName,
RemoteState: configuration.GLOBAL_TERRAFORM_PROJECT,
RemoteStateVariables: outputVariables,
},
)
if err != nil {
log.Printf("rootsystem %s build errored out (exception): %+v", projectName, err)
//panic(fmt.Sprintf("%+v", err))
} else if !success {
log.Printf("rootsystem %s build failed", projectName)
} else {
os.MkdirAll("/var/run/servergarden/threshold/", 0o700)
os.MkdirAll("/var/run/servergarden/caddy/", 0o700)
os.MkdirAll("/var/lib/servergarden/caddy/data/", 0o700)
svg, statusChannel, err := automation.DockerComposeUp(config, workingDirectory)
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because DockerComposeUp() returned")))
}
// err = ioutil.WriteFile("docker.svg", svg, 0o777)
// if err != nil {
// panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because WriteFile(\"docker.svg\") returned")))
// }
fmt.Println("DockerComposeUp kicked off")
diagramPath := fmt.Sprintf(
"rootsystem/automation/%04d/docker-compose-%s/diagram.svg", buildNumber, config.Host.Name,
)
statusPath := fmt.Sprintf(
"rootsystem/automation/%04d/docker-compose-%s/status.json", buildNumber, config.Host.Name,
)
success, err := streamUpdatesToObjectStorage(diagramPath, svg, statusPath, statusChannel)
if err != nil {
log.Printf("rootsystem docker-compose errored out (exception): %+v", err)
} else if !success {
log.Printf("rootsystem docker-compose failed")
}
}
}
// sit and do nothing forever.
a := make(chan bool)
<-a
}
func terraformBuild(
config *configuration.Configuration,
terraformConfig automation.TerraformConfiguration,
) ([]string, bool, error) {
outputVariables, err := automation.WriteTerraformCodeForTargetedModules(
config,
global.workingDirectory,
terraformConfig,
)
if err != nil {
return []string{}, false, err
}
fmt.Println("WriteTerraformCodeForTargetedModules done")
svg, statusChannel, err := automation.TerraformPlanAndApply(config, global.workingDirectory, terraformConfig.TerraformProject)
if err != nil {
return []string{}, false, err
}
fmt.Println("TerraformPlanAndApply kicked off")
diagramPath := fmt.Sprintf(
"rootsystem/automation/%04d/%s/diagram.svg",
terraformConfig.BuildNumber, terraformConfig.TerraformProject,
)
statusPath := fmt.Sprintf(
"rootsystem/automation/%04d/%s/status.json",
terraformConfig.BuildNumber, terraformConfig.TerraformProject,
)
success, err := streamUpdatesToObjectStorage(diagramPath, svg, statusPath, statusChannel)
if err != nil {
return outputVariables, false, err
}
return outputVariables, success, nil
}
func streamUpdatesToObjectStorage(
diagramPath string,
svg []byte,
statusPath string,
statusChannel chan automation.TerraformApplyResult,
) (bool, error) {
err := global.storage.Put(diagramPath, svg)
if err != nil {
return false, err
}
lastLog := ""
for status := range statusChannel {
statusJson, err := json.MarshalIndent(status, "", " ")
if err != nil {
return false, err
}
newLog := strings.TrimPrefix(status.Log, lastLog)
lastLog = status.Log
log.Println(newLog)
//log.Printf("len(newLog): %d\n", len(newLog))
// status1 := automation.TerraformApplyResult{
// Error: status.Error,
// Success: status.Success,
// Complete: status.Complete,
// Status: status.Status,
// }
// statusJson1, err := json.MarshalIndent(status1, "", " ")
// if err != nil {
// return []string{}, false, err
// }
// log.Println(string(statusJson1))
err = global.storage.Put(statusPath, statusJson)
if err != nil {
log.Printf("can't upload terraform status update to object storage: %+v", err)
}
if status.Complete {
return status.Success, status.Error
}
}
return false, errors.New("streamUpdatesToObjectStorage: statusChannel closed before status was Complete")
}
func terraformStateServer() error {
// Make sure to only listen on localhost.
// TODO change this to HTTPS or unix socket
server := http.Server{
Addr: fmt.Sprintf("127.0.0.1:%d", configuration.TERRAFORM_STATE_SERVER_PORT_NUMBER),
Handler: terraformStateHandler{},
}
return server.ListenAndServe()
}