
378 lines
12 KiB
Raw Permalink Normal View History

2020-02-15 15:09:23 +00:00
package main
import (
2020-11-04 07:24:31 +00:00
2020-11-04 07:24:31 +00:00
errors ""
2020-02-15 15:09:23 +00:00
2020-02-24 19:20:16 +00:00
type applicationState struct {
workingDirectory string
storage objectStorage.ObjectStorager
2020-02-24 19:20:16 +00:00
var global applicationState
func main() {
config, workingDirectory, err := configuration.LoadConfiguration()
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because loadConfiguration() returned")))
global.workingDirectory = workingDirectory
storage, err := objectStorage.InitializeObjectStorage(config, true)
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because failed to initialize object storage")))
} = storage
go terraformStateServer()
results := automation.DoInParallel(
func() automation.TaskResult {
// This creates an access key that the gateway cloud instance can use to upload its SSH public key
// to our object storage. the host-key-poller will download this SSH host public key and add it to our known_hosts
// so that we can SSH to the gateway instance securely
hostKeysAccessSpec := objectStorage.ObjectStorageKey{
Name: "rootsystem-known-hosts",
PathPrefix: "rootsystem/known-hosts",
Read: true,
Write: true,
Delete: false,
List: false,
knownHostsCredentials, err :=
if err != nil {
return automation.TaskResult{
Name: "knownHostsCredentials",
Err: errors.Wrap(err, "can't create object storage access key for known_hosts"),
return automation.TaskResult{
Name: "knownHostsCredentials",
Result: knownHostsCredentials,
func() automation.TaskResult {
// BuildTLSCertsForThreshold fills in the CAs, Keys, and Certificates in the Threshold ansible roles.
// So when terraform invokes ansible to install threshold client/server, it will install working
// certificates and keys
err = pki.BuildTLSCertsForThreshold(
if err != nil {
return automation.TaskResult{
Name: "buildTLSCertsForThreshold",
Err: errors.Wrap(err, "can't create certs for threshold"),
2020-11-01 21:08:29 +00:00
return automation.TaskResult{Name: "buildTLSCertsForThreshold"}
func() automation.TaskResult {
sshPort := 2201
hostSSHPortFilename := fmt.Sprintf("rootsystem/ssh/%s.txt", config.Host.Name)
file, notFound, err :=
if err != nil && !notFound {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't download %s", hostSSHPortFilename),
if !notFound {
sshPort, err = strconv.Atoi(string(file.Content))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't read %s as a number", hostSSHPortFilename),
} else {
file, notFound, err :="rootsystem/ssh/next-port.txt")
if err != nil && !notFound {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrap(err, "can't download next-port.txt"),
if !notFound {
sshPort, err = strconv.Atoi(string(file.Content))
if err != nil {
sshPort = 2201
log.Printf("warning: next-port.txt did not contain a number. defaulting to %d. contents: %s\n", sshPort, string(file.Content))
err =, []byte(strconv.Itoa(sshPort)))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrapf(err, "can't can't upload %s", hostSSHPortFilename),
err ="rootsystem/ssh/next-port.txt", []byte(strconv.Itoa(sshPort+1)))
if err != nil {
return automation.TaskResult{
Name: "sshPort",
Err: errors.Wrap(err, "can't can't upload next-port.txt"),
return automation.TaskResult{
Name: "sshPort",
Result: sshPort,
func() automation.TaskResult {
// Add 1 to the build number, each time rootsystem runs is a different build.
2020-10-31 22:46:49 +00:00
file, notFound, err :="rootsystem/automation/build-number.txt")
if err != nil && !notFound {
return automation.TaskResult{
Name: "buildNumber",
Err: errors.Wrap(err, "can't download build-number.txt"),
buildNumber := 1
if !notFound {
n, err := strconv.Atoi(string(file.Content))
if err != nil {
log.Printf("warning: build-number.txt did not contain a number. defaulting to build number 1. contents: %s\n", string(file.Content))
} else {
buildNumber = n + 1
2020-10-31 22:46:49 +00:00
err ="rootsystem/automation/build-number.txt", []byte(strconv.Itoa(buildNumber)))
if err != nil {
return automation.TaskResult{
Name: "buildNumber",
Err: errors.Wrap(err, "can't can't upload build-number.txt"),
return automation.TaskResult{
Name: "buildNumber",
Result: buildNumber,
for _, result := range results {
if result.Err != nil {
panic(fmt.Sprintf("can't start rootsystem because %s: %+v", result.Name, result.Err))
//sshPort := results["sshPort"].Result.(int)
buildNumber := results["buildNumber"].Result.(int)
knownHostsCredentials := results["knownHostsCredentials"].Result.([]configuration.Credential)
// First, run the terraform build for the GLOBAL components, meaning the components
// that exist in the cloud, independent of how many nodes are being used.
outputVariables, success, err := terraformBuild(
BuildNumber: buildNumber,
TargetedModules: config.Terraform.GlobalModules,
TerraformProject: configuration.GLOBAL_TERRAFORM_PROJECT,
HostKeysObjectStorageCredentials: knownHostsCredentials,
if err != nil {
log.Printf("rootsystem %s build errored out (exception): %+v", configuration.GLOBAL_TERRAFORM_PROJECT, err)
// Don't crash the app if the TF build failed, just sit there and do nothing. User has to do something to
// fix the build before we run again.
//panic(fmt.Sprintf("%+v", err))
} else if !success {
log.Printf("rootsystem %s build failed", configuration.GLOBAL_TERRAFORM_PROJECT)
} else {
// Next, we run a separate LOCAL terraform build which is specific to THIS node,
// this build will be responsible for installing software on this node & registering this node with the
// cloud resources
projectName := fmt.Sprintf("%s-%s", configuration.LOCAL_TERRAFORM_PROJECT, config.Host.Name)
_, success, err = terraformBuild(
BuildNumber: buildNumber,
TargetedModules: config.Terraform.LocalModules,
TerraformProject: projectName,
RemoteState: configuration.GLOBAL_TERRAFORM_PROJECT,
RemoteStateVariables: outputVariables,
if err != nil {
log.Printf("rootsystem %s build errored out (exception): %+v", projectName, err)
//panic(fmt.Sprintf("%+v", err))
} else if !success {
log.Printf("rootsystem %s build failed", projectName)
} else {
2020-11-04 07:24:31 +00:00
os.MkdirAll(filepath.Dir(configuration.THRESHOLD_SOCKET), 0o700)
os.MkdirAll(filepath.Dir(configuration.CADDY_SOCKET), 0o700)
os.MkdirAll(configuration.CADDY_DATA, 0o700)
svg, statusChannel, err := automation.DockerComposeUp(config, workingDirectory)
if err != nil {
panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because DockerComposeUp() returned")))
// err = ioutil.WriteFile("docker.svg", svg, 0o777)
// if err != nil {
// panic(fmt.Sprintf("%+v", errors.Wrap(err, "rootsystem can't start because WriteFile(\"docker.svg\") returned")))
// }
fmt.Println("DockerComposeUp kicked off")
diagramPath := fmt.Sprintf(
2020-10-31 22:46:49 +00:00
"rootsystem/automation/%04d/docker-compose-%s/diagram.svg", buildNumber, config.Host.Name,
statusPath := fmt.Sprintf(
2020-10-31 22:46:49 +00:00
"rootsystem/automation/%04d/docker-compose-%s/status.json", buildNumber, config.Host.Name,
success, err := streamUpdatesToObjectStorage(diagramPath, svg, statusPath, statusChannel)
if err != nil {
log.Printf("rootsystem docker-compose errored out (exception): %+v", err)
} else if !success {
log.Printf("rootsystem docker-compose failed")
2020-11-04 07:24:31 +00:00
} else {
time.Sleep(5 * time.Second)
2020-11-05 02:13:37 +00:00
for {
err = automation.IngressConfig(config)
if err != nil {
log.Printf("rootsystem IngressConfig failed: %+v", err)
} else {
log.Printf("rootsystem IngressConfig success")
time.Sleep(30 * time.Second)
2020-11-04 07:24:31 +00:00
2020-11-05 02:13:37 +00:00
// sit and do nothing forever.
a := make(chan bool)
func terraformBuild(
config *configuration.Configuration,
terraformConfig automation.TerraformConfiguration,
) ([]string, bool, error) {
outputVariables, err := automation.WriteTerraformCodeForTargetedModules(
if err != nil {
return []string{}, false, err
fmt.Println("WriteTerraformCodeForTargetedModules done")
svg, statusChannel, err := automation.TerraformPlanAndApply(config, global.workingDirectory, terraformConfig.TerraformProject)
if err != nil {
return []string{}, false, err
fmt.Println("TerraformPlanAndApply kicked off")
diagramPath := fmt.Sprintf(
2020-10-31 22:46:49 +00:00
terraformConfig.BuildNumber, terraformConfig.TerraformProject,
statusPath := fmt.Sprintf(
2020-10-31 22:46:49 +00:00
terraformConfig.BuildNumber, terraformConfig.TerraformProject,
success, err := streamUpdatesToObjectStorage(diagramPath, svg, statusPath, statusChannel)
if err != nil {
return outputVariables, false, err
return outputVariables, success, nil
func streamUpdatesToObjectStorage(
diagramPath string,
svg []byte,
statusPath string,
statusChannel chan automation.TerraformApplyResult,
) (bool, error) {
err :=, svg)
if err != nil {
return false, err
lastLog := ""
for status := range statusChannel {
statusJson, err := json.MarshalIndent(status, "", " ")
if err != nil {
return false, err
newLog := strings.TrimPrefix(status.Log, lastLog)
lastLog = status.Log
//log.Printf("len(newLog): %d\n", len(newLog))
// status1 := automation.TerraformApplyResult{
// Error: status.Error,
// Success: status.Success,
// Complete: status.Complete,
// Status: status.Status,
// }
// statusJson1, err := json.MarshalIndent(status1, "", " ")
// if err != nil {
// return []string{}, false, err
// }
// log.Println(string(statusJson1))
err =, statusJson)
if err != nil {
log.Printf("can't upload terraform status update to object storage: %+v", err)
if status.Complete {
return status.Success, status.Error
return false, errors.New("streamUpdatesToObjectStorage: statusChannel closed before status was Complete")
2020-03-05 15:47:46 +00:00
func terraformStateServer() error {
// Make sure to only listen on localhost.
// TODO change this to HTTPS or unix socket
server := http.Server{
Addr: fmt.Sprintf("", configuration.TERRAFORM_STATE_SERVER_PORT_NUMBER),
Handler: terraformStateHandler{},
return server.ListenAndServe()