From b65cbbeb8ce2e9d1e96713f5f4a405518bb1a860 Mon Sep 17 00:00:00 2001
From: John-Paul Robinson <jpr@uab.edu>
Date: Wed, 11 Dec 2019 11:17:48 -0600
Subject: [PATCH] Fix provisioner race condition in null_resource

Collapse compute node and ood ops provisioners into a single
null_resource to avoid a race condition when the ansible tasks
run in parallel.  This lead to inconsistent deploys where
the slurm configuration was not working after deploy.

Putting the provisioners in a single resource ensures they run in
series.  This comes at the cost of a loss of deploy flexibility.
Now ood and compute nodes ops steps will always run on changes rather
than selectively.  Since this feature wasn't fully functional, it's
loss is not significant.
---
 main.tf | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/main.tf b/main.tf
index 4156d5b..c95d2c2 100644
--- a/main.tf
+++ b/main.tf
@@ -112,8 +112,10 @@ output "ood-ssh_host" {
     value = "${module.create-ood-instance.ssh_host}"
 }
 
-# compute node post provision
-resource "null_resource" "compute_ops" {
+# compute node and ood post provision
+# use single null_resource for serial provisioner runs to avoid race conditions
+# that lead to inconsistent deploy successes.
+resource "null_resource" "ops" {
   triggers = {
     ohpc_instance = module.create-ohpc-instance.id
     compute_instances = join(",", module.nodes.id)
@@ -137,28 +139,8 @@ resource "null_resource" "compute_ops" {
       for node, net in module.nodes.network:
     "ansible-playbook -c local -i /CRI_XCBC/hosts -l `hostname -s` -e \"{'compute_nodes':[{'name':'${node}', 'ip':'${net[0].fixed_ip_v4}', 'mac':'${net[0].mac}', 'vnfs':'', 'sockets':'1', 'corespersocket':'1'}]}\" /CRI_XCBC/site-ops.yaml -b -v"]
   }
-}
-
-# ood node post provision
-resource "null_resource" "ood_ops" {
-  triggers = {
-    ohpc_instance = module.create-ohpc-instance.id
-  }
-
-  connection {
-    host        = module.create-ohpc-instance.ssh_host
-    user        = var.ohpc_user
-    private_key = file(var.ssh_private_key)
-  }
-
-  # moves CRI_XCBC file into directory made above
-  provisioner "file" {
-    source      = "CRI_XCBC"
-    destination = "/"
-  }
-
 
-# ood node
+  # ood node
   provisioner "remote-exec" {
     inline = [
       for net in module.create-ood-instance.network:
-- 
GitLab