Browse Source

working on handling ansible module errors correctly

master
forest 2 years ago
parent
commit
4c12341b9b
  1. 2
      ReadMe.md
  2. 37
      automation/terraformActions.go
  3. 2
      automation/terraformCodeGeneration.go
  4. 2
      notes.txt

2
ReadMe.md

@ -5,7 +5,7 @@ server.garden Privileged Automation Agent
```
mkdir -p ssh
ssh-keygen -t ed25519 -N '' -f ./ssh/severgarden_builtin_ed22519
ssh-keygen -t ed25519 -N '' -f ./ssh/servergarden_builtin_ed22519
go build -o ansible-wrapper/ansible-playbook-wrapper ansible-wrapper/main.go
go build -o host-key-poller/host-key-poller host-key-poller/main.go

37
automation/terraformActions.go

@ -117,6 +117,7 @@ type AnsibleTaskResult struct {
type TerraformApplyResult struct {
Error error
Success bool
Log string
Complete bool
Status *SimplifiedTerraformStatus
@ -341,6 +342,7 @@ func monitorTerraformApplyProgress(
logLinesWithoutAnsiEscapes := []string{}
terraformIsRunning := true
terraformDirectory := filepath.Join(workingDirectory, terraformProject)
ansibleModuleLevelErrors := map[string]bool{}
// https://github.com/acarl005/stripansi/blob/master/stripansi.go
ansiEscapeRegex := regexp.MustCompile("[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))")
@ -422,15 +424,16 @@ func monitorTerraformApplyProgress(
// line which has the module / resource name on it.
// So for example we could find a line:
//
// Error: Error running command './ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/severgarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml': exit status 4. Output:
// Error: Error running command './ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/servergarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml': exit status 4. Output:
//
// Then we look back in the log for any line which contains that command, and we find:
//
// module.ansible-threshold-server.null_resource.ansible_playbook[0] (local-exec): Executing: ["/bin/sh" "-c" "./ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/severgarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml"]
// module.ansible-threshold-server.null_resource.ansible_playbook[0] (local-exec): Executing: ["/bin/sh" "-c" "./ansible-playbook-wrapper --private-key '/usr/lib/rootsystem/ssh/servergarden_builtin_ed22519' -i '167.71.175.207,' -u root -e 'domain=greenhouseusers.com arch=amd64' playbook.yml"]
//
matches := terraformExecErrorRegex.FindStringSubmatch(lineWithoutAnsiEscapes)
if matches != nil {
commandThatFailed := matches[1]
log.Printf("terraformExecErrorRegex matched %s", lineWithoutAnsiEscapes)
log.Printf("terraformExecErrorRegex matched %s", commandThatFailed)
previousLineContainingCommandThatFailed := ""
for i, previousLine := range logLinesWithoutAnsiEscapes {
@ -447,7 +450,8 @@ func monitorTerraformApplyProgress(
for moduleName, module := range simpleStatus.Modules {
if strings.HasPrefix(address, moduleName) {
if module.IsAnsible {
log.Printf("setting ansible module %s resources to state error", moduleName)
log.Printf("setting ansible module %s resources to state error", module.DisplayName)
ansibleModuleLevelErrors[module.DisplayName] = true
for _, resource := range module.Resources {
resource.State = "error"
}
@ -483,17 +487,27 @@ func monitorTerraformApplyProgress(
if err == nil {
processedLogLines = append(processedLogLines, string(logBytes))
} else {
//fmt.Printf("ansible.log: %s\n", err)
fmt.Printf("error trying to read ansible log: %s\n", err)
}
// if the entire ansible playbook errored out (like, it couldn't connect or something)
// then the status of the individual roles has already been set to error, we don't want to overwrite that.
// so just exit early
if ansibleModuleLevelErrors[module] {
continue
}
jsonBytes, err := ioutil.ReadFile(filepath.Join(terraformDirectory, "modules", module, "ansible-log.json"))
if err == nil {
var ansibleLog []AnsibleTaskResult
log.Printf("%s\n", string(jsonBytes))
err = json.Unmarshal(jsonBytes, &ansibleLog)
if err == nil {
module, has := simpleStatus.Modules[fmt.Sprintf("module.%s", module)]
if has {
ansibleRoles := map[string]int{}
//ansibleRolesErrors := map[string]int{}
for _, ansibleResult := range ansibleLog {
//if ansibleResult.
if ansibleResult.Role != "" && ansibleResult.Success {
ansibleRoles[ansibleResult.Role] += 1
}
@ -541,6 +555,12 @@ func monitorTerraformApplyProgress(
})()
err := applyProcess.Wait()
_, isExitError := err.(*exec.ExitError)
if err != nil && !isExitError {
err = errors.Wrap(err, "error waiting for terraform to finish running: ")
} else {
err = nil
}
// once upon a time I got an error
// panic: send on closed channel @ terraformActions.go:204 (logLinesChannel <- blahblah; in scanAllOutput())
@ -551,19 +571,20 @@ func monitorTerraformApplyProgress(
})()
terraformIsRunning = false
terraformSuccess := true
if err == nil && applyProcess.ProcessState.ExitCode() != 0 {
err = fmt.Errorf("terraform apply failed: exit code %d", applyProcess.ProcessState.ExitCode())
} else {
err = errors.Wrap(err, "terraform apply failed: ")
// If the apply fails, its not really an exceptional case, we don't want to exit the application.
terraformSuccess = false
//err = errors.New(fmt.Sprintf("terraform apply failed: exit code %d", applyProcess.ProcessState.ExitCode()))
}
err = errors.WithStack(err)
log := joinAnsibleLogsAndUpdateAnsibleStatus()
outputChannel <- TerraformApplyResult{
Error: err,
Complete: true,
Success: terraformSuccess,
Log: log,
Status: simpleStatus,
}

2
automation/terraformCodeGeneration.go

@ -55,7 +55,7 @@ const ssh_private_key_filepath = "ssh_private_key_filepath"
const node_id = "node_id"
const node_arch = "node_arch"
const post_to_object_storage_shell_script = "post_to_object_storage_shell_script"
const ssh_private_key_filepath_value = "ssh/severgarden_builtin_ed22519"
const ssh_private_key_filepath_value = "ssh/servergarden_builtin_ed22519"
func WriteTerraformCodeForTargetedModules(
config *configuration.Configuration,

2
notes.txt

@ -4,7 +4,7 @@ TODO:
Clean up post_to_object_storage_shell_script. Make it a template rather than a variable?
forest@tower:~/Desktop/git/rootsystem/terraform-modules/ansible-threshold-server$ ansible-playbook --private-key '/home/forest/Desktop/git/rootsystem/ssh/severgarden_builtin_ed22519' -i '104.131.56.31,' -u root -e 'domain=server.garden arch=amd64' playbook.yml
forest@tower:~/Desktop/git/rootsystem/terraform-modules/ansible-threshold-server$ ansible-playbook --private-key '/home/forest/Desktop/git/rootsystem/ssh/servergarden_builtin_ed22519' -i '104.131.56.31,' -u root -e 'domain=server.garden arch=amd64' playbook.yml

Loading…
Cancel
Save