diff --git a/ReadMe.md b/ReadMe.md index f8e2638..3225757 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -5,7 +5,7 @@ server.garden Privileged Automation Agent ``` mkdir -p ssh -ssh-keygen -t ed25519 -N '' -f ./ssh/severgarden_builtin_ed22519 +ssh-keygen -t ed25519 -N '' -f ./ssh/servergarden_builtin_ed22519 go build -o ansible-wrapper/ansible-playbook-wrapper ansible-wrapper/main.go go build -o host-key-poller/host-key-poller host-key-poller/main.go diff --git a/automation/terraformActions.go b/automation/terraformActions.go index d9c0a44..96a1c98 100644 --- a/automation/terraformActions.go +++ b/automation/terraformActions.go @@ -117,6 +117,7 @@ type AnsibleTaskResult struct { type TerraformApplyResult struct { Error error + Success bool Log string Complete bool Status *SimplifiedTerraformStatus @@ -341,6 +342,7 @@ func monitorTerraformApplyProgress( logLinesWithoutAnsiEscapes := []string{} terraformIsRunning := true terraformDirectory := filepath.Join(workingDirectory, terraformProject) + ansibleModuleLevelErrors := map[string]bool{} // https://github.com/acarl005/stripansi/blob/master/stripansi.go ansiEscapeRegex := regexp.MustCompile("[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))") @@ -422,15 +424,16 @@ func monitorTerraformApplyProgress( // line which has the module / resource name on it. // So for example we could find a line: // - // Error: Error running command './ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/severgarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml': exit status 4. Output: + // Error: Error running command './ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/servergarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml': exit status 4. Output: // // Then we look back in the log for any line which contains that command, and we find: // - // module.ansible-threshold-server.null_resource.ansible_playbook[0] (local-exec): Executing: ["/bin/sh" "-c" "./ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/severgarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml"] + // module.ansible-threshold-server.null_resource.ansible_playbook[0] (local-exec): Executing: ["/bin/sh" "-c" "./ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/servergarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml"] // matches := terraformExecErrorRegex.FindStringSubmatch(lineWithoutAnsiEscapes) if matches != nil { commandThatFailed := matches[1] + log.Printf("terraformExecErrorRegex matched %s", lineWithoutAnsiEscapes) log.Printf("terraformExecErrorRegex matched %s", commandThatFailed) previousLineContainingCommandThatFailed := "" for i, previousLine := range logLinesWithoutAnsiEscapes { @@ -447,7 +450,8 @@ func monitorTerraformApplyProgress( for moduleName, module := range simpleStatus.Modules { if strings.HasPrefix(address, moduleName) { if module.IsAnsible { - log.Printf("setting ansible module %s resources to state error", moduleName) + log.Printf("setting ansible module %s resources to state error", module.DisplayName) + ansibleModuleLevelErrors[module.DisplayName] = true for _, resource := range module.Resources { resource.State = "error" } @@ -483,17 +487,27 @@ func monitorTerraformApplyProgress( if err == nil { processedLogLines = append(processedLogLines, string(logBytes)) } else { - //fmt.Printf("ansible.log: %s\n", err) + fmt.Printf("error trying to read ansible log: %s\n", err) + } + + // if the entire ansible playbook errored out (like, it couldn't connect or something) + // then the status of the individual roles has already been set to error, we don't want to overwrite that. + // so just exit early + if ansibleModuleLevelErrors[module] { + continue } jsonBytes, err := ioutil.ReadFile(filepath.Join(terraformDirectory, "modules", module, "ansible-log.json")) if err == nil { var ansibleLog []AnsibleTaskResult + log.Printf("%s\n", string(jsonBytes)) err = json.Unmarshal(jsonBytes, &ansibleLog) if err == nil { module, has := simpleStatus.Modules[fmt.Sprintf("module.%s", module)] if has { ansibleRoles := map[string]int{} + //ansibleRolesErrors := map[string]int{} for _, ansibleResult := range ansibleLog { + //if ansibleResult. if ansibleResult.Role != "" && ansibleResult.Success { ansibleRoles[ansibleResult.Role] += 1 } @@ -541,6 +555,12 @@ func monitorTerraformApplyProgress( })() err := applyProcess.Wait() + _, isExitError := err.(*exec.ExitError) + if err != nil && !isExitError { + err = errors.Wrap(err, "error waiting for terraform to finish running: ") + } else { + err = nil + } // once upon a time I got an error // panic: send on closed channel @ terraformActions.go:204 (logLinesChannel <- blahblah; in scanAllOutput()) @@ -551,19 +571,20 @@ func monitorTerraformApplyProgress( })() terraformIsRunning = false + terraformSuccess := true if err == nil && applyProcess.ProcessState.ExitCode() != 0 { - err = fmt.Errorf("terraform apply failed: exit code %d", applyProcess.ProcessState.ExitCode()) - } else { - err = errors.Wrap(err, "terraform apply failed: ") + // If the apply fails, its not really an exceptional case, we don't want to exit the application. + terraformSuccess = false + //err = errors.New(fmt.Sprintf("terraform apply failed: exit code %d", applyProcess.ProcessState.ExitCode())) } - err = errors.WithStack(err) log := joinAnsibleLogsAndUpdateAnsibleStatus() outputChannel <- TerraformApplyResult{ Error: err, Complete: true, + Success: terraformSuccess, Log: log, Status: simpleStatus, } diff --git a/automation/terraformCodeGeneration.go b/automation/terraformCodeGeneration.go index 96aae09..cd4fc9a 100644 --- a/automation/terraformCodeGeneration.go +++ b/automation/terraformCodeGeneration.go @@ -55,7 +55,7 @@ const ssh_private_key_filepath = "ssh_private_key_filepath" const node_id = "node_id" const node_arch = "node_arch" const post_to_object_storage_shell_script = "post_to_object_storage_shell_script" -const ssh_private_key_filepath_value = "ssh/severgarden_builtin_ed22519" +const ssh_private_key_filepath_value = "ssh/servergarden_builtin_ed22519" func WriteTerraformCodeForTargetedModules( config *configuration.Configuration, diff --git a/notes.txt b/notes.txt index 0ad3c5e..e5aea85 100644 --- a/notes.txt +++ b/notes.txt @@ -4,7 +4,7 @@ TODO: Clean up post_to_object_storage_shell_script. Make it a template rather than a variable? -forest@tower:~/Desktop/git/rootsystem/terraform-modules/ansible-threshold-server$ ansible-playbook --private-key '/home/forest/Desktop/git/rootsystem/ssh/severgarden_builtin_ed22519' -i '104.131.56.31,' -u root -e 'domain=server.garden arch=amd64' playbook.yml +forest@tower:~/Desktop/git/rootsystem/terraform-modules/ansible-threshold-server$ ansible-playbook --private-key '/home/forest/Desktop/git/rootsystem/ssh/servergarden_builtin_ed22519' -i '104.131.56.31,' -u root -e 'domain=server.garden arch=amd64' playbook.yml