Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • dwheel7/hpc-factory
  • rc/hpc-factory
  • louistw/hpc-factory
  • jpr/hpc-factory
  • krish94/hpc-factory
  • atlurie/hpc-factory
6 results
Show changes
Commits on Source (249)
Showing
with 844 additions and 473 deletions
......@@ -8,13 +8,12 @@ variables:
ANSIBLE_REMOTE_TMP: "/tmp"
AWS_DEFAULT_REGION: "bhm"
AWS_HOST: "s3.lts.rc.uab.edu"
FF_SCRIPT_SECTIONS: "true"
OS_AUTH_TYPE: "v3applicationcredential"
OS_AUTH_URL: "https://keystone.cloud.rc.uab.edu:5000/v3"
OS_IDENTITY_API_VERSION: "3"
OS_INTERFACE: "public"
OS_REGION_NAME: "bhm1"
OOD_INSTANCE_NETWORK: "knightly-network"
PROXY_NETWORK: "proxy-net"
PKR_VAR_flavor: "m1.medium-ruffner"
PKR_VAR_source_image: "CentOS-7-x86_64-GenericCloud-2009"
PKR_VAR_floating_ip_network: "uab-campus"
......@@ -22,26 +21,20 @@ variables:
PKR_VAR_skip_create_image: "false"
PKR_VAR_ssh_username: "centos"
PKR_VAR_networks: '["8cf2f12e-905d-46d9-bc70-b0897c65f75a"]'
PKR_VAR_image_membership: '["cf6fa1e53d4c40a49f4e0e469c440359"]'
GIT_AUTHOR_NAME: "Gitlab runner"
GIT_AUTHOR_EMAIL: "gitlab@runner"
NUM_SERVER_TO_KEEP: 1
NUM_IMAGE_TO_KEEP: 30
TIMESTAMP_REGEXP: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{6}'
PKR_VAR_root_ssh_key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBAFqqWgmYpEaGtHBeTu27ntVJpYjwq/x5aBefrvfhk8Z9lE3cuZ26vJ9n/9tGE4Zn2Pew1mpZgi6PzfJ3vMt8yA= root@master"
DEV_KEY: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCpncAcYosVHt7HsUcE2XOYDuCi4HQnmFJv279LOcpZgXtZ6o0BM1fe5FgJS0X1ohBXQUFRuYJuJSW/GSmC1K8T+wCrKjZLJdMbqrubHV27diUZfdoVkoJy1vcAQF5nEcoTC7MpAFbBomdn2rsrpgQe8DGiURV7+soqybXV1OsIR3FFf6npnUaskHYT/oVtG9eBOnscyBxoVgbxzlmyoBLXED/sHKFw4nQSF/glYKEFiDu6TRTsBBEGvv23Qo/66QpQiFJ6TNfApNiyY9L1X+Dy8EWU6lozmNgwGDjXQ70Lr6xHnA0QGVALJlHXa6QjpgtpC5Nefsdvtf1hpfFo2VutpbSB+aq9jk3gWNN+XkhrWN5PiwP7YYJNw/WozyfL+IhwjfHZGxkuws+wGR6ZKxlX9W9Vrsq9ncYNKuhy2SdsR6s2XECQtrEQ6ZlX5jRt6Yh5M9ls5fMsWEqknDPmr1Ui6wV7NxprYngo9fLSdYO/ETIO3S6PB0aEHOZOyGitGaM06EmNpvjQn/QkkaVgt/O8wKL1o1AVzXhDMAFvtG6ejppV6kuTUHXFgSGZF6N9fnP91HuytyzC09F+NMWcmnRdrgXlHapjuuL3zzi+XLCQvk8+aYTzBKx1nU2FPMDRZ9sInGmqdTuM002E7qVbaCy4OxcWaAS/L2UVhGnHr+egYw== louistw@uab.edu"
INSTANCE_FLAVOR: "m1.medium-ruffner"
HTTP_PROXY_INSTANCE_NAME: "http-proxy"
SSH_PROXY_INSTANCE_NAME: "ssh-proxy"
stages:
- pre-build
- build
- test
- deploy
- cleanup
workflow:
rules:
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
- if: $CI_PIPELINE_SOURCE == 'schedule'
- if: $CI_PIPELINE_SOURCE == "web"
- if: $CI_PIPELINE_SOURCE == "schedule"
.get_build_date: &get_build_date
- export BUILD_DATE=$(TZ=America/Chicago date +%Y-%m-%dT%H%M%S)
......@@ -50,478 +43,386 @@ workflow:
.update_ansible_repo: &update_ansible_repo
- *get_build_date
- |
if [ ! -d $CI_PROJECT_DIR/CRI_XCBC ]; then
git clone https://github.com/uabrc/CRI_XCBC.git
cd CRI_XCBC
git remote add upstream https://github.com/jprorama/CRI_XCBC.git
export EXT_REPO_DIR=$(basename -s .git $EXT_PR_TARGET_REPO)
if [ ! -d $CI_PROJECT_DIR/$EXT_REPO_DIR ]; then
git clone ${EXT_PR_TARGET_REPO} ${EXT_REPO_DIR}
cd ${EXT_REPO_DIR}
git remote add upstream ${EXT_PR_SRC_REPO}
cd ..
fi
- cd CRI_XCBC
- cd ${EXT_REPO_DIR}
- git config user.name "${GIT_AUTHOR_NAME}"
- git config user.email "${GIT_AUTHOR_EMAIL}"
- git fetch origin uab-prod
- git fetch upstream dev
- git checkout uab-prod
- git merge origin/uab-prod
- git checkout ${EXT_PR_TARGET_BRANCH}
- git fetch origin ${EXT_PR_TARGET_BRANCH}
- git merge origin/${EXT_PR_TARGET_BRANCH}
- git checkout -b integration
- git merge upstream/dev
- export CRI_XCBC_HEAD=$(git rev-parse --short HEAD)
- export CRI_XCBC_dev=$(git rev-parse --short upstream/dev)
- export CRI_XCBC_prod=$(git rev-parse --short origin/uab-prod)
- git fetch upstream ${EXT_PR_SRC_BRANCH}
- git merge upstream/${EXT_PR_SRC_BRANCH}
# export vars into job artifacts
- export EXT_REPO_HEAD=$(git rev-parse --short HEAD)
- export EXT_PR_SRC_BRANCH_SHA=$(git rev-parse --short upstream/${EXT_PR_SRC_BRANCH})
- export EXT_PR_TARGET_BRANCH_SHA=$(git rev-parse --short origin/${EXT_PR_TARGET_BRANCH})
- cd ..
- export PACKER_IMAGE_HEAD=$(git rev-parse --short HEAD)
- echo CRI_XCBC_HEAD=${CRI_XCBC_HEAD} | tee -a $CI_PROJECT_DIR/image.env
- echo CRI_XCBC_dev=${CRI_XCBC_dev} | tee -a $CI_PROJECT_DIR/image.env
- echo CRI_XCBC_prod=${CRI_XCBC_prod} | tee -a $CI_PROJECT_DIR/image.env
- echo EXT_REPO_HEAD=${EXT_REPO_HEAD} | tee -a $CI_PROJECT_DIR/image.env
- echo EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} | tee -a $CI_PROJECT_DIR/image.env
- echo EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} | tee -a $CI_PROJECT_DIR/image.env
- echo PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} | tee -a $CI_PROJECT_DIR/image.env
.get_ansible_files: &get_ansible_files
- s3cmd get --force -r --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ ansible/files/
build_docker_image:
image: docker:20.10.17
stage: pre-build
services:
- docker:20.10.16-dind
tags:
- dind
before_script:
- *get_build_date
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
script:
- docker build -t $CI_REGISTRY_IMAGE:$BUILD_DATE -t $CI_REGISTRY_IMAGE:latest .
- >
docker run --rm $CI_REGISTRY_IMAGE bash -c
'ansible --version &&
openstack --version &&
packer version &&
s3cmd --version &&
terraform --version'
- docker push --all-tags $CI_REGISTRY_IMAGE
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
changes:
- Dockerfile
allow_failure: true
build_base_image:
stage: build
tags:
- build
.build_proxy_image_template: &build_proxy_image_template
script:
- |
if [ -n "${BUILT_BASE_IMAGE_ID}" ]; then
exit 0
fi
- *update_ansible_repo
- *get_ansible_files
- export REPO_HEAD=$(git rev-parse --short HEAD)
- export PKR_VAR_flavor="${BASE_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export PKR_VAR_build_instance_name="base-${REPO_HEAD}"
# packer vars for job env
- export PKR_VAR_flavor="${PROXY_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="base-PR-${CI_MERGE_REQUEST_IID}"
export PKR_VAR_image_name="${BUILD_TARGET}-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="base-${BUILD_DATE}"
export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
fi
# Ansible var overrides
- |
if [ -n "${PROXY_ENABLE_VAR}" ]; then
sed -i -E "s/(${PROXY_ENABLE_VAR}: ).*/\1true/" $EXT_REPO_DIR/group_vars/all
fi
- packer init openstack
- packer validate openstack
- packer build -machine-readable openstack | tee base_build.log
- export BUILT_BASE_IMAGE_ID=$(grep 'Image:' base_build.log | awk '{print $4}')
- echo BUILT_BASE_IMAGE_ID=${BUILT_BASE_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
- openstack image unset --property signature_verified $BUILT_BASE_IMAGE_ID
- 'sed -i -E "s|(s3_endpoint: ).*|\1\"${S3_ENDPOINT}\"|" $EXT_REPO_DIR/group_vars/all'
- 'sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" $EXT_REPO_DIR/group_vars/all'
- 'sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" $EXT_REPO_DIR/group_vars/all'
- 'sed -i -E "s/(s3_shibboleth_bucket_name: ).*/\1\"${S3_SHIBBOLETH_BUCKET_NAME}\"/" $EXT_REPO_DIR/group_vars/all'
- 'sed -i -E "s/(s3_shibboleth_object_name: ).*/\1\"${S3_SHIBBOLETH_OBJECT_NAME}\"/" $EXT_REPO_DIR/group_vars/all'
- 'sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup(''file'', ''${SSH_PUB_KEY}'') }}\"|" $EXT_REPO_DIR/group_vars/all'
# packer commands
- packer init openstack-proxy
- packer validate openstack-proxy
- packer build -machine-readable openstack-proxy | tee proxy_build.log
- export BUILT_PROXY_IMAGE_ID=$(grep 'Image:' proxy_build.log | awk '{print $4}')
- echo BUILT_PROXY_IMAGE_ID=${BUILT_PROXY_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
# set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} ${BUILT_PROXY_IMAGE_ID}
artifacts:
reports:
dotenv: image.env
expire_in: 30 days
build_compute_image:
build_http_proxy_image:
stage: build
needs: [build_base_image]
environment:
name: build
tags:
- build
script:
- *update_ansible_repo
- *get_ansible_files
- export PKR_VAR_source_image=${BUILT_BASE_IMAGE_ID}
- export REPO_HEAD=$(git rev-parse --short HEAD)
- export PKR_VAR_flavor="${COMPUTE_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export PKR_VAR_build_instance_name="compute-${REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="compute-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="compute-${BUILD_DATE}"
fi
- packer init openstack-compute
- packer validate openstack-compute
- packer build -machine-readable openstack-compute | tee compute_build.log
variables:
PROXY_ENABLE_VAR: "enable_http_proxy"
<<: *build_proxy_image_template
rules:
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "http-proxy"
when: always
build_gpu_image:
build_ssh_proxy_image:
stage: build
needs: [build_base_image]
environment:
name: build
tags:
- build
variables:
PROXY_ENABLE_VAR: "enable_ssh_proxy"
<<: *build_proxy_image_template
rules:
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "ssh-proxy"
when: always
.build_login_image_template: &build_login_image_template
script:
- *update_ansible_repo
- *get_ansible_files
- FAILED=false
- export GPU_PLACEHOLDER_NAME="gpu1-placeholder"
- export GPU_PLACEHOLDER_FLAVOR="gpu1.medium"
- export GPU_PLACEHOLDER_IMAGE="CentOS-7-x86_64-GenericCloud-2009"
- export PKR_VAR_source_image=${BUILT_BASE_IMAGE_ID}
- export REPO_HEAD=$(git rev-parse --short HEAD)
- export PKR_VAR_flavor="${GPU_BUILD_FLAVOR:-gpu1.medium}"
- export PKR_VAR_build_instance_name="gpu-${REPO_HEAD}"
# packer vars for job env
- export PKR_VAR_flavor="${PROXY_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="gpu-PR-${CI_MERGE_REQUEST_IID}"
export PKR_VAR_image_name="${BUILD_TARGET}-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="gpu-${BUILD_DATE}"
fi
- packer init openstack-gpu
- packer validate openstack-gpu
- openstack server delete --wait $GPU_PLACEHOLDER_NAME
- packer build -machine-readable openstack-gpu | tee gpu_build.log || FAILED=true
- openstack server create --image $GPU_PLACEHOLDER_IMAGE --network cicd-net --flavor $GPU_PLACEHOLDER_FLAVOR $GPU_PLACEHOLDER_NAME
- |
if [ "$FAILED" = true ]; then
exit 1
export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
fi
# packer commands
- packer init openstack-login
- packer validate openstack-login
- packer build -machine-readable openstack-login | tee login_build.log
- export BUILT_LOGIN_IMAGE_ID=$(grep 'Image:' login_build.log | awk '{print $4}')
- echo BUILT_LOGIN_IMAGE_ID=${BUILT_LOGIN_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
# set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${CI_COMMIT_SHORT_SHA} ${BUILT_LOGIN_IMAGE_ID}
artifacts:
reports:
dotenv: image.env
build_login_image:
stage: build
environment:
name: build
tags:
- build
<<: *build_login_image_template
rules:
- if: $SKIP_GPU_BUILD == "true"
when: never
- when: always
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "login"
when: always
build_ood_image:
stage: build
environment:
name: build
tags:
- build
script:
- *update_ansible_repo
- *get_ansible_files
- >
curl --header "PRIVATE-TOKEN: ${ANSIBLE_VAR_TOKEN}"
"${CI_API_V4_URL}/projects/2836/repository/files/knightly/raw?ref=main"
-o CRI_XCBC/group_vars/knightly
- 'sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(user_register_app_key: ).*/\1\"${SELF_REG_APP_KEY}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(celery_user_password: ).*/\1\"${CELERY_PASSWD}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup(''file'', ''${SSH_PUB_KEY}'') }}\"|" CRI_XCBC/group_vars/knightly'
# packer vars for job env
- export PKR_VAR_flavor="${OOD_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- packer init openstack-ood
- packer validate openstack-ood
- export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false
- export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="ood-PR-${CI_MERGE_REQUEST_IID}"
echo INSTANCE_FLAVOR="${PKR_VAR_flavor}" | tee -a $CI_PROJECT_DIR/image.env
echo OOD_INSTANCE_NAME="ood-PR-${CI_MERGE_REQUEST_IID}" | tee -a $CI_PROJECT_DIR/image.env
export FLOATING_IP=$(openstack floating ip create uab-campus -f value -c floating_ip_address)
echo FLOATING_IP=$FLOATING_IP | tee -a $CI_PROJECT_DIR/image.env
sed -i -E "s/(ood_servername: ).*/\1\"$CI_COMMIT_REF_SLUG.$FLOATING_IP.nip.io\"/" CRI_XCBC/group_vars/knightly
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="ood-${BUILD_DATE}"
echo INSTANCE_FLAVOR="${OOD_INSTANCE_FLAVOR:-cpu16-64g}" | tee -a $CI_PROJECT_DIR/image.env
echo OOD_INSTANCE_NAME="ood-knightly" | tee -a $CI_PROJECT_DIR/image.env
echo FLOATING_IP=$TEST_IP | tee -a $CI_PROJECT_DIR/image.env
if [ $ENV = 'knightly' ] || [ $ENV = 'prod' ]; then
curl --header "PRIVATE-TOKEN: ${ANSIBLE_VAR_TOKEN}" \
"${CI_API_V4_URL}/projects/2836/repository/files/$ENV/raw?ref=main" \
-o CRI_XCBC/group_vars/$ENV
sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" CRI_XCBC/group_vars/$ENV
sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" CRI_XCBC/group_vars/$ENV
sed -i -E "s/(user_register_app_key: ).*/\1\"${SELF_REG_APP_KEY}\"/" CRI_XCBC/group_vars/$ENV
sed -i -E "s/(celery_user_password: ).*/\1\"${CELERY_PASSWD}\"/" CRI_XCBC/group_vars/$ENV
sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup('file', '${SSH_PUB_KEY}') }}\"|" CRI_XCBC/group_vars/$ENV
fi
- >
PKR_VAR_build_instance_name="ood-${CRI_XCBC_HEAD}"
PKR_VAR_image_date_suffix=false
packer build -machine-readable openstack-ood | tee ood_build.log
# packer commands
- packer init openstack-ood
- packer validate openstack-ood
- packer build -machine-readable openstack-ood | tee ood_build.log
- export BUILT_OOD_IMAGE_ID=$(grep 'Image:' ood_build.log | awk '{print $4}')
- echo BUILT_OOD_IMAGE_ID=${BUILT_OOD_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
- openstack image set --property CRI_XCBC_prod=${CRI_XCBC_prod} --property CRI_XCBC_dev=${CRI_XCBC_dev} --property PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} ${BUILT_OOD_IMAGE_ID}
# set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${CI_COMMIT_SHORT_SHA} ${BUILT_OOD_IMAGE_ID}
artifacts:
reports:
dotenv: image.env
rules:
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "ood"
when: always
test_ood_image:
stage: test
needs: [build_ood_image]
deploy_http_proxy_node:
stage: deploy
environment:
name: knightly
name: $ENV
tags:
- build
script:
- openstack image set --accept $BUILT_OOD_IMAGE_ID
- openstack image set --accept $HTTP_PROXY_IMAGE_ID || true
- FAILED=false
- |
eval $(ssh-agent -s)
chmod 400 "$SSH_PRIV_KEY"
ssh-add "$SSH_PRIV_KEY"
mkdir ~/.ssh
chmod 700 ~/.ssh
- OLD_INSTANCE_IP=$(openstack floating ip list --floating-ip-address $CHEAHA_IP -c "Fixed IP Address" -f value)
- echo $OLD_INSTANCE_IP
- |
if [ ! -z $OLD_INSTANCE_IP ]; then
export OLD_INSTANCE_ID=$(openstack server list --name $OOD_INSTANCE_NAME --ip $OLD_INSTANCE_IP -c ID -f value)
fi
- echo OLD_INSTANCE_ID=$OLD_INSTANCE_ID | tee -a instance.env
- |
cat > user_data.txt << OEOF
cat > user_data.txt <<EOF
#!/bin/bash
echo "Starting user_data: \$(date)"
cat > /etc/resolv.conf << EOF
search openstack.internal cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster
nameserver 172.20.0.25
EOF
cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
mkdir -p /run/shibboleth
chown shibd:shibd /run/shibboleth
echo "Installing s3cmd: \$(date)"
pip3 install s3cmd
echo "Downloading hostkey via s3cmd: \$(date)"
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://knightly-key/ /etc/ssh/
echo "Download completed: \$(date)"
OEOF
- >
export NEW_INSTANCE_ID=$(openstack server create
-c id -f value --image $BUILT_OOD_IMAGE_ID
--network $OOD_INSTANCE_NETWORK
--security-group ood-https-ports
--security-group node-exporter
--security-group allow-ssh
--user-data user_data.txt
--flavor $INSTANCE_FLAVOR
--wait
$OOD_INSTANCE_NAME)
- echo NEW_INSTANCE_ID=$NEW_INSTANCE_ID | tee -a instance.env
- openstack server add floating ip $NEW_INSTANCE_ID $FLOATING_IP
- >
curl --retry 10 --retry-delay 20 --retry-connrefused https://knightly.rc.uab.edu/Shibboleth.sso/Metadata --resolve knightly.rc.uab.edu:443:$FLOATING_IP -kf
|| FAILED=true
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- |
cp "$SSH_KNOWN_HOSTS" ~/.ssh/known_hosts
chmod 644 ~/.ssh/known_hosts
until ssh acctsvc@$FLOATING_IP hostname; do sleep 5; done
ssh acctsvc@$FLOATING_IP '[ $(mount | grep "etc/auto" | wc -l) -eq 6 ]' || FAILED=true
export cmd="openstack server create"
cmd+=" -c id -f value --image $HTTP_PROXY_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done
cmd+=" --user-data user_data.txt"
if [ -n "$PROXY_NETWORK" ];then cmd+=" --network $PROXY_NETWORK"; fi
if [ -n "$HTTP_PROXY_PORT" ];then cmd+=" --port $HTTP_PROXY_PORT"; fi
cmd+=" --wait $HTTP_PROXY_INSTANCE_NAME"
- export HTTP_PROXY_INSTANCE_ID=$(bash -c "$cmd")
- |
if [ "$FAILED" = true ]; then
if [ "${DELETE_WHEN_FAILED-true}" = true ]; then
openstack server delete $NEW_INSTANCE_ID
echo "DELETE_BUILT_IMAGE=true" | tee -a instance.env
fi
false
fi
- openstack server remove floating ip $NEW_INSTANCE_ID $FLOATING_IP
artifacts:
reports:
dotenv: instance.env
# Associate the floating IP(s) with the HTTP Proxy instance
for HTTP_PROXY_FLOATING_IP in ${HTTP_PROXY_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $HTTP_PROXY_FLOATING_IP with HTTP_PROXY_INSTANCE_ID $HTTP_PROXY_INSTANCE_ID"
openstack server add floating ip $HTTP_PROXY_INSTANCE_ID $HTTP_PROXY_FLOATING_IP
done
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
- if: $PIPELINE_TARGET == "deploy" && $HTTP_PROXY_IMAGE_ID
when: always
test_ood_image_mr:
stage: test
needs: [build_ood_image]
deploy_ssh_proxy_node:
stage: deploy
environment:
name: $ENV
tags:
- build
script:
- export OOD_INSTANCE_NETWORK="cicd-net"
- openstack image set --accept $SSH_PROXY_IMAGE_ID || true
- FAILED=false
- |
eval $(ssh-agent -s)
chmod 400 "$SSH_PRIV_KEY"
ssh-add "$SSH_PRIV_KEY"
mkdir ~/.ssh
chmod 700 ~/.ssh
- |
cat > user_data.txt << OEOF
cat > user_data.txt <<EOF
#!/bin/bash
cat > /etc/resolv.conf << EOF
search openstack.internal cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster
nameserver 172.20.0.25
EOF
cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
mkdir -p /run/shibboleth
chown shibd:shibd /run/shibboleth
OEOF
- >
export NEW_INSTANCE_ID=$(openstack server create
-c id -f value --image $BUILT_OOD_IMAGE_ID
--network $OOD_INSTANCE_NETWORK
--security-group ood-https-ports
--security-group allow-ssh
--user-data user_data.txt
--flavor $INSTANCE_FLAVOR
--wait
$OOD_INSTANCE_NAME)
- echo NEW_INSTANCE_ID=$NEW_INSTANCE_ID | tee -a instance.env
- openstack server add floating ip $NEW_INSTANCE_ID $FLOATING_IP
- >
curl --retry 10 --retry-delay 20 --retry-connrefused https://knightly.rc.uab.edu/Shibboleth.sso/Metadata --resolve knightly.rc.uab.edu:443:$FLOATING_IP -kf
|| FAILED=true
- ssh -o StrictHostKeyChecking=no acctsvc@$FLOATING_IP '[ $(mount | grep "etc/auto" | wc -l) -eq 6 ]' || FAILED=true
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- |
if [ "$FAILED" = true ]; then
if [ "${DELETE_WHEN_FAILED-true}" = true ]; then
openstack server delete $NEW_INSTANCE_ID
openstack image delete $BUILT_OOD_IMAGE_ID
fi
false
fi
artifacts:
reports:
dotenv: instance.env
rules:
- if: $CI_MERGE_REQUEST_ID
deploy_review:
stage: deploy
script:
- echo "Deploy Review App"
environment:
name: review/$CI_COMMIT_REF_SLUG
url: https://$CI_COMMIT_REF_SLUG.$FLOATING_IP.nip.io
on_stop: stop_review
auto_stop_in: 2 days
tags:
- build
rules:
- if: $CI_MERGE_REQUEST_ID
stop_review:
stage: deploy
script:
- openstack server delete $NEW_INSTANCE_ID
- openstack image delete $BUILT_OOD_IMAGE_ID
- openstack floating ip delete $FLOATING_IP
environment:
name: review/$CI_COMMIT_REF_SLUG
action: stop
tags:
- build
export cmd="openstack server create"
cmd+=" -c id -f value --image $SSH_PROXY_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done
cmd+=" --user-data user_data.txt"
if [ -n "$PROXY_NETWORK" ];then cmd+=" --network $PROXY_NETWORK"; fi
if [ -n "$SSH_PROXY_PORT" ];then cmd+=" --port $SSH_PROXY_PORT"; fi
cmd+=" --wait $SSH_PROXY_INSTANCE_NAME"
- export SSH_PROXY_INSTANCE_ID=$(bash -c "$cmd")
- |
# Associate the floating IP(s) with the SSH Proxy instance
for SSH_PROXY_FLOATING_IP in ${SSH_PROXY_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $SSH_PROXY_FLOATING_IP with SSH_PROXY_INSTANCE_ID $SSH_PROXY_INSTANCE_ID"
openstack server add floating ip $SSH_PROXY_INSTANCE_ID $SSH_PROXY_FLOATING_IP
done
rules:
- if: $CI_MERGE_REQUEST_ID
when: manual
- if: $PIPELINE_TARGET == "deploy" && $SSH_PROXY_IMAGE_ID
when: always
deploy_knightly:
deploy_login_node:
stage: deploy
environment:
name: knightly
name: $ENV
tags:
- build
script:
- openstack image set --accept $LOGIN_IMAGE_ID || true
- FAILED=false
- |
if [ ! -z $OLD_INSTANCE_ID ]; then
openstack server remove floating ip $OLD_INSTANCE_ID $CAMPUS_IP
openstack server remove floating ip $OLD_INSTANCE_ID $CHEAHA_IP
fi
cat > user_data.txt <<EOF
#!/bin/bash
cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ /tmp/${CI_PROJECT_NAME}/ansible/files/
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- |
if [ ! -z $NEW_INSTANCE_ID ]; then
openstack server add floating ip $NEW_INSTANCE_ID $CAMPUS_IP
openstack server add floating ip $NEW_INSTANCE_ID $CHEAHA_IP
fi
only:
- schedules
deploy_cheaha:
stage: deploy
environment:
name: cheaha
tags:
- build
script:
- echo "Job placeholder to deploy to Cheaha"
when: manual
only:
- main
cleanup_knightly:
stage: cleanup
environment:
name: knightly
tags:
- build
script:
- >
SERVER_TO_BE_DELETE=($(openstack server list --name $OOD_INSTANCE_NAME --sort-column Image --sort-descending -f value -c ID
| awk -v NSTK=$NUM_SERVER_TO_KEEP -v OID=$OLD_INSTANCE_ID '$0 != OID {count++}
$0 != OID && count>NSTK {print}'))
export cmd="openstack server create"
cmd+=" -c id -f value --image $LOGIN_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done
cmd+=" --user-data user_data.txt"
if [ -n "$INSTANCE_NETWORK" ];then cmd+=" --network $INSTANCE_NETWORK"; fi
if [ -n "$LOGIN_PORT" ];then cmd+=" --port $LOGIN_PORT"; fi
cmd+=" --wait $LOGIN_INSTANCE_NAME"
- export LOGIN_INSTANCE_ID=$(bash -c "$cmd")
- |
for svr in ${SERVER_TO_BE_DELETE[@]}; do
echo "Deleting server $svr"
openstack server delete ${svr}
# Associate the floating IP(s) with the SSH Proxy instance
for LOGIN_FLOATING_IP in ${LOGIN_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $LOGIN_FLOATING_IP with LOGIN_INSTANCE_ID $LOGIN_INSTANCE_ID"
openstack server add floating ip $LOGIN_INSTANCE_ID $LOGIN_FLOATING_IP
done
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
- if: $PIPELINE_TARGET == "deploy" && $LOGIN_IMAGE_ID
when: always
cleanup_integration:
stage: cleanup
deploy_ood_node:
stage: deploy
environment:
name: $ENV
tags:
- build
script:
- OS_PROJECT_ID=$(openstack application credential show $OS_APPLICATION_CREDENTIAL_ID -f value -c project_id)
- openstack image list --sort-column Name --sort-descending -f value -c Name -c ID --property owner=$OS_PROJECT_ID > images.txt
- |
if [ "${DELETE_BUILT_IMAGE-false}" = true ]; then
openstack image delete $BUILT_OOD_IMAGE_ID
fi
- >
OOD_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=ood-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
BASE_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=base-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
COMPUTE_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=compute-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
GPU_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=gpu-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- |
for img in ${OOD_IMAGE_TO_BE_DELETE[@]}; do
echo "Deleting image $img"
openstack image delete ${img}
done
- |
for img in ${BASE_IMAGE_TO_BE_DELETE[@]}; do
echo "Deleting image $img"
openstack image delete ${img}
done
- openstack image set --accept $OOD_IMAGE_ID || true
- FAILED=false
- |
for img in ${COMPUTE_IMAGE_TO_BE_DELETE[@]}; do
echo "Deleting image $img"
openstack image delete ${img}
done
cat > user_data.txt <<EOF
#!/bin/bash
cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ /tmp/${CI_PROJECT_NAME}/ansible/files/
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- |
for img in ${GPU_IMAGE_TO_BE_DELETE[@]}; do
echo "Deleting image $img"
openstack image delete ${img}
export cmd="openstack server create"
cmd+=" -c id -f value --image $OOD_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
when: always
cleanup_mr:
stage: cleanup
tags:
- build
script:
- OS_PROJECT_ID=$(openstack application credential show $OS_APPLICATION_CREDENTIAL_ID -f value -c project_id)
- >
IMAGE_TO_BE_DELETE=($(openstack image list --sort-column Name --sort-descending -f value -c Name -c ID --property owner=$OS_PROJECT_ID
| awk -v REGEX="(ood|base|compute|gpu)-PR-$CI_MERGE_REQUEST_IID" '{if ($0 ~ REGEX) print $1}'))
cmd+=" --user-data user_data.txt"
if [ -n "$INSTANCE_NETWORK" ];then cmd+=" --network $INSTANCE_NETWORK"; fi
if [ -n "$OOD_PORT" ];then cmd+=" --port $OOD_PORT"; fi
cmd+=" --wait $OOD_INSTANCE_NAME"
- export OOD_INSTANCE_ID=$(bash -c "$cmd")
- |
for img in ${IMAGE_TO_BE_DELETE[@]}; do
echo "Deleting image $img"
openstack image delete ${img}
# Associate the floating IP(s) with the SSH Proxy instance
for OOD_FLOATING_IP in ${OOD_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $OOD_FLOATING_IP with OOD_INSTANCE_ID $OOD_INSTANCE_ID"
openstack server add floating ip $OOD_INSTANCE_ID $OOD_FLOATING_IP
done
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $PIPELINE_TARGET == "deploy" && $OOD_IMAGE_ID
when: always
---
- name: Setup node for use as a virtual cheaha node
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts' }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client' }
---
- name: Setup node for use as a virtual cheaha node
hosts: all
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts', when: enable_nfs_mounts }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client', when: enable_slurm_client }
- { name: 'ssh_host_keys', tags: 'ssh_host_keys' }
- { name: 'ssh_proxy_config', tags: 'ssh_proxy_config', when: enable_ssh_proxy_config }
- { name: 'ssl_cert', tags: 'ssl_cert', when: enable_ssl_certs }
- { name: 'rsyslog_config', tags: 'rsyslog_config', when: enable_rsyslog_config }
- { name: 'rewrite_map', tags: 'rewrite_map', when: enable_rewrite_map }
- { name: 'fail2ban', tags: 'fail2ban', when: enable_fail2ban }
- { name: 'install_node_exporter', tags: 'install_node_exporter', when: enable_node_exporter }
......@@ -4,9 +4,88 @@
yum_repo_files: []
pkg_list: []
slurm_version: 18.08.9
enable_slurm_client: false
# NHC related
nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm"
nhc_download_path: "/tmp"
nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git"
nhc_git_repo_path: "/tmp/nhc"
root_ssh_key: ""
# cheaha.node related
hostname_lookup_table:
- "10.141.255.254 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- openstack.internal
- cm.cluster
nameserver_list:
- 10.141.255.254
# ldap_config related
ldap_cert_path: "/etc/openldap/certs"
ldap_uri: "ldap://ldapserver"
# nfs_mounts related
enable_nfs_mounts: true
use_autofs: false
use_fstab: false
mount_points:
- { "src": "master:/gpfs4", "path": "/gpfs4", "opts": "ro,sync,hard", "mode": "0755" }
- { "src": "master:/gpfs5", "path": "/gpfs5", "opts": "ro,sync,hard", "mode": "0755" }
autofs_mounts:
- { "src": "master:/gpfs4/&", "path": "/gpfs4", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs4", "map_name": "gpfs4", key: "*" }
- { "src": "master:/gpfs5/&", "path": "/gpfs5", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs5", "map_name": "gpfs5", key: "*" }
#SSH Host Keys
S3_ENDPOINT: ""
SSH_HOST_KEYS_S3_BUCKET: ""
SSH_HOST_KEYS_S3_OBJECT: ""
# AWS credentials
LTS_ACCESS_KEY: ""
LTS_SECRET_KEY: ""
# ssh proxy
enable_ssh_proxy_config: false
sshpiper_dest_dir: "/opt/sshpiper"
# rsyslog
enable_rsyslog_config: true
rsyslog_target: "*.* @master:514"
# ssl certs
enable_ssl_certs: false
ssl_cert_s3_bucket: ""
ssl_cert_key_location: "/etc/pki/tls/private"
ssl_cert_file_location: "/etc/pki/tls/certs"
ssl_cert_key: ""
ssl_cert_file: ""
ssl_cert_chain_file: ""
ssl_apache_config: ""
apache_service: "httpd"
# rewrite map
enable_rewrite_map: false
target_groups:
- {"name": "gpfs4", "host": "login001", "default": True }
- {"name": "gpfs5", "host": "login002", "default": False }
# account app
account_app_port: 8000
# fail2ban
enable_fail2ban: false
maxretry: 1
findtime: 600
bantime: 1200
fail2ban_white_list: "127.0.0.1/8"
# Node Exporter
enable_node_exporter: false
node_exporter_ver: "1.8.2"
node_exporter_filename: "node_exporter-{{ node_exporter_ver }}.linux-amd64"
node_exporter_user: node_exporter
node_exporter_group: node_exporter
node_exporter_port: 9100
---
# cheaha.node related
hostname_lookup_table:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- cm.cluster
- rc.uab.edu
- ib.cluster
- drac.cluster
- eth.cluster
- ib-hdr.cluster
nameserver_list:
- 172.20.0.25
bright_openldap_path: "/cm/local/apps/openldap"
ldap_cert_path: "{{bright_openldap_path}}/etc/certs"
ldap_uri: "ldaps://ldapserver"
# proxy_config
target_groups:
- {"name": "gpfs5", "host": "login002", "default": False, "authorized_keys":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
- {"name": "gpfs4", "host": "login001", "default": True, "authorized_keys":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
......@@ -5,8 +5,5 @@
roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
- { name: 'install_nhc', tags: 'install_nhc'}
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
......@@ -6,6 +6,3 @@
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'install_zsh', tags: 'install_zsh' }
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
......@@ -4,15 +4,24 @@
path: /etc/hosts
line: "{{ item }}"
loop:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
"{{ hostname_lookup_table }}"
- name: Add proper DNS search to lookup other nodes on the cluster
ansible.builtin.lineinfile:
path: /etc/dhcp/dhclient.conf
insertbefore: BOF
line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";'
create: true
state: present
- name: Template resolv.conf
ansible.builtin.template:
src: resolv.conf.j2
dest: /etc/resolv.conf
owner: root
group: root
mode: 0644
backup: true
- name: Disable SELinux
ansible.posix.selinux:
......@@ -25,6 +34,7 @@
owner: root
group: root
mode: 0644
when: "'cm.repo' in yum_repo_files"
- name: Add ssh key for root access
ansible.posix.authorized_key:
......@@ -35,3 +45,7 @@
- name: Set timezone to America/Chicago
community.general.timezone:
name: America/Chicago
retries: 3
delay: 3
register: result
until: not result.failed
search {{ domain_search_list | join(' ') }}
{% for name_server in nameserver_list %}
nameserver {{ name_server }}
{% endfor %}
---
- name: Install fail2ban
ansible.builtin.package:
name: "{{ item }}"
state: present
loop:
- fail2ban
- fail2ban-firewalld
- name: Configure fail2ban
ansible.builtin.template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
backup: true
loop:
- { src: 'jail.local.j2', dest: '/etc/fail2ban/jail.local' }
- { src: 'sshpiperd_filter.local.j2', dest: '/etc/fail2ban/filter.d/sshpiperd.local' }
- { src: 'sshpiperd_jail.local.j2', dest: '/etc/fail2ban/jail.d/sshpiperd.local' }
- name: Activate the firewalld support for fail2ban
ansible.builtin.command:
cmd: mv /etc/fail2ban/jail.d/00-firewalld.conf /etc/fail2ban/jail.d/00-firewalld.local
- name: Configure firewalld to allow ssh and sshpiper traffic
ansible.posix.firewalld:
port: "{{ item }}"
zone: public
state: enabled
permanent: true
loop:
- 2222/tcp
- 22/tcp
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
- name: Enable and start fail2ban
ansible.builtin.service:
name: fail2ban
enabled: true
state: restarted
[DEFAULT]
banaction = firewalld
bantime = {{ bantime }}
ignoreip = {{ fail2ban_white_list }}
[sshd]
enabled = true
# Refer to https://github.com/fail2ban/fail2ban/wiki/Developing-Regex-in-Fail2ban for developing regex using fail2ban
#
[INCLUDES]
before = common.conf
[DEFAULT]
_daemon = sshpiperd
__iso_datetime = "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2}|Z)"
__pref = time=%(__iso_datetime)s level=(?:debug|error)
[Definition]
# Define the prefix regex for the log lines
prefregex = ^<F-MLFID>%(__prefix_line)s%(__pref)s</F-MLFID>\s+<F-CONTENT>.+</F-CONTENT>$
# Failregex to match the specific failure log lines (prefregex is automatically included)
failregex = ^msg="connection from .*failtoban: ip <HOST> too auth many failures"$
ignoreregex =
mode = normal
maxlines = 1
# This configuration will block the remote host after {{maxretry}} failed SSH login attempts.
[sshpiperd]
enabled = true
filter = sshpiperd
logpath = /var/log/messages
port = 22
maxretry = {{ maxretry }}
backend = auto
findtime = {{ findtime }}
---
- name: Download node_exporter binary
ansible.builtin.get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_ver }}/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp/{{ node_exporter_filename }}.tar.gz"
- name: Extract node_exporter
ansible.builtin.unarchive:
src: "/tmp/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp"
remote_src: yes
- name: Create system group for user account {{ node_exporter_group }}
ansible.builtin.group:
name: "{{ node_exporter_group }}"
system: true
state: present
- name: Create system user account {{ node_exporter_user }}
ansible.builtin.user:
name: "{{ node_exporter_user }}"
comment: Prometheus node_exporter system account
group: "{{ node_exporter_group }}"
system: true
home: /var/lib/node_exporter
create_home: false
shell: /sbin/nologin
state: present
- name: Copy node_exporter binary
ansible.builtin.copy:
src: "/tmp/{{ node_exporter_filename }}/node_exporter"
dest: /usr/local/bin/node_exporter
remote_src: yes
owner: root
group: root
mode: 0755
- name: Copy systemd unit file
ansible.builtin.template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
- name: Clean up /tmp
ansible.builtin.file:
path: "/tmp/{{ item }}"
state: absent
loop:
- "{{ node_exporter_filename }}.tar.gz"
- "{{ node_exporter_filename }}"
- name: Restart node_exporter service
ansible.builtin.systemd:
daemon_reload: yes
name: node_exporter
state: restarted
enabled: true
- name: Collect facts about system services
ansible.builtin.service_facts:
- name: Configure firewalld to allow prometheus
ansible.posix.firewalld:
port: "{{ node_exporter_port }}/tcp"
zone: public
state: enabled
permanent: true
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
[Unit]
Description=Node Exporter
After=network.target
[Service]
User={{ node_exporter_user }}
Group={{ node_exporter_group }}
Type=simple
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:{{ node_exporter_port }} --collector.filesystem.mount-points-exclude "^/(dev|proc|run/user/.+|run/credentials/.+|sys|var/lib/docker/.+)($|/)" --collector.filesystem.fs-types-exclude "^(autofs|binfmt_misc|bpf|cgroup|tmpfs|sunrpc|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
[Install]
WantedBy=multi-user.target
......@@ -7,7 +7,6 @@
- nss-pam-ldapd
- openldap
- openldap-clients
- openldap-servers
- sssd-ldap
- name: Update nsswitch.conf to look for ldap
......@@ -25,7 +24,7 @@
- name: Copy ldap cert(s) into place
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}"
dest: "{{ ldap_cert_path }}/{{ item.src }}"
owner: ldap
group: ldap
mode: 0440
......@@ -33,10 +32,11 @@
- { src: ca.pem }
- { src: ldap.key }
- { src: ldap.pem }
when: ldap_uri | regex_search('^ldaps://')
- name: Copy ldap config into place
ansible.builtin.copy:
src: nslcd.conf
ansible.builtin.template:
src: nslcd.conf.j2
dest: /etc/nslcd.conf
owner: root
group: root
......@@ -46,5 +46,6 @@
ansible.builtin.service:
name: "{{ item }}"
enabled: yes
state: restarted
loop:
- nslcd
# This is the configuration file for the LDAP nameservice
# switch library's nslcd daemon. It configures the mapping
# between NSS names (see /etc/nsswitch.conf) and LDAP
# information in the directory.
# See the manual page nslcd.conf(5) for more information.
# The user and group nslcd should run as.
uid nslcd
gid ldap
# The uri pointing to the LDAP server to use for name lookups.
# Multiple entries may be specified. The address that is used
# here should be resolvable without using LDAP (obviously).
#uri ldap://127.0.0.1/
#uri ldaps://127.0.0.1/
#uri ldapi://%2fvar%2frun%2fldapi_sock/
# Note: %2f encodes the '/' used as directory separator
uri {{ ldap_uri }}
# The LDAP version to use (defaults to 3
# if supported by client library)
#ldap_version 3
# The distinguished name of the search base.
base dc=cm,dc=cluster
# The distinguished name to bind to the server with.
# Optional: default is to bind anonymously.
#binddn cn=proxyuser,dc=example,dc=com
# The credentials to bind with.
# Optional: default is no credentials.
# Note that if you set a bindpw you should check the permissions of this file.
#bindpw secret
# The distinguished name to perform password modifications by root by.
#rootpwmoddn cn=admin,dc=example,dc=com
# The default search scope.
#scope sub
#scope one
#scope base
# Customize certain database lookups.
#base group ou=Groups,dc=example,dc=com
#base passwd ou=People,dc=example,dc=com
#base shadow ou=People,dc=example,dc=com
#scope group onelevel
#scope hosts sub
# Bind/connect timelimit.
#bind_timelimit 30
# Search timelimit.
#timelimit 30
# Idle timelimit. nslcd will close connections if the
# server has not been contacted for the number of seconds.
idle_timelimit 240
# Use StartTLS without verifying the server certificate.
#ssl start_tls
#tls_reqcert never
{% if ldap_uri | regex_search('^ldaps://') %}
ssl on
tls_reqcert demand
# CA certificates for server certificate verification
#tls_cacertdir /etc/ssl/certs
tls_cacertfile /cm/local/apps/openldap/etc/certs/ca.pem
tls_cert /cm/local/apps/openldap/etc/certs/ldap.pem
tls_key /cm/local/apps/openldap/etc/certs/ldap.key
{% endif %}
# Seed the PRNG if /dev/urandom is not provided
#tls_randfile /var/run/egd-pool
# SSL cipher suite
# See man ciphers for syntax
#tls_ciphers TLSv1
# Client certificate and key
# Use these, if your server requires client authentication.
# Mappings for Services for UNIX 3.5
#filter passwd (objectClass=User)
#map passwd uid msSFU30Name
#map passwd userPassword msSFU30Password
#map passwd homeDirectory msSFU30HomeDirectory
#map passwd homeDirectory msSFUHomeDirectory
#filter shadow (objectClass=User)
#map shadow uid msSFU30Name
#map shadow userPassword msSFU30Password
#filter group (objectClass=Group)
#map group member msSFU30PosixMember
# Mappings for Services for UNIX 2.0
#filter passwd (objectClass=User)
#map passwd uid msSFUName
#map passwd userPassword msSFUPassword
#map passwd homeDirectory msSFUHomeDirectory
#map passwd gecos msSFUName
#filter shadow (objectClass=User)
#map shadow uid msSFUName
#map shadow userPassword msSFUPassword
#map shadow shadowLastChange pwdLastSet
#filter group (objectClass=Group)
#map group member posixMember
# Mappings for Active Directory
#pagesize 1000
#referrals off
#idle_timelimit 800
#filter passwd (&(objectClass=user)(!(objectClass=computer))(uidNumber=*)(unixHomeDirectory=*))
#map passwd uid sAMAccountName
#map passwd homeDirectory unixHomeDirectory
#map passwd gecos displayName
#filter shadow (&(objectClass=user)(!(objectClass=computer))(uidNumber=*)(unixHomeDirectory=*))
#map shadow uid sAMAccountName
#map shadow shadowLastChange pwdLastSet
#filter group (objectClass=group)
# Alternative mappings for Active Directory
# (replace the SIDs in the objectSid mappings with the value for your domain)
#pagesize 1000
#referrals off
#idle_timelimit 800
#filter passwd (&(objectClass=user)(objectClass=person)(!(objectClass=computer)))
#map passwd uid cn
#map passwd uidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
#map passwd gidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
#map passwd homeDirectory "/home/$cn"
#map passwd gecos displayName
#map passwd loginShell "/bin/bash"
#filter group (|(objectClass=group)(objectClass=person))
#map group gidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
# Mappings for AIX SecureWay
#filter passwd (objectClass=aixAccount)
#map passwd uid userName
#map passwd userPassword passwordChar
#map passwd uidNumber uid
#map passwd gidNumber gid
#filter group (objectClass=aixAccessGroup)
#map group cn groupName
#map group gidNumber gid
# This comment prevents repeated auto-migration of settings.
---
- name: Create base directories
ansible.builtin.file:
path: "{{ item.path }}"
state: directory
mode: "{{ item.mode }}"
loop:
- { path: /local, mode: '0777' }
- { path: /share, mode: '0755' }
- name: Create mountpoint dirs
ansible.builtin.file:
path: "{{ item.path }}"
state: directory
mode: "{{ item.mode }}"
loop:
"{{ autofs_mounts }}"
- name: Remove unused entry in master map
ansible.builtin.replace:
dest: /etc/auto.master
regexp: '{{ item.regexp }}'
replace: '{{ item.replace }}'
backup: true
loop:
- { regexp: '^(/misc)', replace: '#\1' }
- { regexp: '^(/net)', replace: '#\1' }
- { regexp: '^(\+auto.master)', replace: '#\1' }
- name: Add master map file
ansible.builtin.lineinfile:
path: "/etc/auto.master.d/gpfs.autofs"
line: "{{ item.mount_point }} /etc/auto.{{ item.map_name }}"
create: yes
loop:
"{{ autofs_mounts }}"
- name: Set up autofs map files
ansible.builtin.lineinfile:
path: "/etc/auto.{{ item.map_name }}"
line: "{{ item.key }} -{{ item.opts }} {{ item.src }}"
create: true
loop:
"{{ autofs_mounts }}"
- name: Create symbolic links
ansible.builtin.file:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
force: yes
state: link
loop:
- { src: /data/rc/apps, dest: /share/apps }
- name: Enable and start autofs service
ansible.builtin.service:
name: autofs
enabled: true
state: restarted
---
- name: Create base directories
ansible.builtin.file:
path: "{{ item.path }}"
state: directory
mode: "{{ item.mode }}"
loop:
"{{ mount_points }}"
- name: Mount the directories
ansible.posix.mount:
src: "{{ item.src }}"
path: "{{ item.path }}"
opts: "{{ item.opts }}"
state: mounted
fstype: nfs
loop:
"{{ mount_points }}"
---
- name: Create base directories
ansible.builtin.file:
path: "{{ item.dir }}"
state: directory
mode: "{{ item.mode }}"
loop:
- { dir: /local, mode: '0777' }
- { dir: /scratch, mode: '0755' }
- { dir: /share, mode: '0755' }
- { dir: /data/rc/apps, mode: '0755' } # this is only required for the symlink to be happy
- { dir: /data/user, mode: '0755' }
- { dir: /data/project, mode: '0755' }
- name: nfs_mounts using fstab
include_tasks: fstab.yml
when: use_fstab
- name: Remove unused entry in master map
ansible.builtin.replace:
dest: /etc/auto.master
regexp: '{{ item.regexp }}'
replace: '{{ item.replace }}'
backup: true
loop:
- { regexp: '^(/misc)', replace: '#\1' }
- { regexp: '^(/net)', replace: '#\1' }
- { regexp: '^(\+auto.master)', replace: '#\1' }
- name: Add master map file
ansible.builtin.lineinfile:
path: "/etc/auto.master.d/gpfs.autofs"
line: "{{ item.mount_point }} /etc/auto.{{ item.map_name }}"
create: yes
loop:
- { mount_point: "/cm/shared", map_name: "cm-share" }
- { mount_point: "/data/project", map_name: "data-project" }
- { mount_point: "/data/user", map_name: "data-user" }
- { mount_point: "/data/rc/apps", map_name: "data-rc-apps" }
- { mount_point: "/-", map_name: "scratch" }
- { mount_point: "/home", map_name: "home" }
- name: Set up autofs map files
ansible.builtin.lineinfile:
path: "/etc/auto.{{ item.map_name }}"
line: "{{ item.key }} -{{ item.opts }} {{ item.src }}"
create: true
loop:
- { map_name: "cm-share", key: "*", src: "gpfs.rc.uab.edu:/data/cm/shared-8.2/&", opts: "fstype=nfs,vers=3,_netdev,defaults" }
- { map_name: "data-project", key: "*", src: "gpfs.rc.uab.edu:/data/project/&", opts: "fstype=nfs,vers=3,_netdev,defaults" }
- { map_name: "data-user", key: "*", src: "gpfs.rc.uab.edu:/data/user/&", opts: "fstype=nfs,vers=3,_netdev,local_lock=posix,defaults" }
- { map_name: "data-rc-apps", key: "*", src: "gpfs.rc.uab.edu:/data/rc/apps/&", opts: "fstype=nfs,vers=3,_netdev,defaults" }
- { map_name: "scratch", key: "/scratch", src: "gpfs.rc.uab.edu:/scratch", opts: "fstype=nfs,vers=3,_netdev,local_lock=posix,defaults" }
- { map_name: "home", key: "*", src: ":/data/user/home/&", opts: 'fstype=bind' }
- name: Create symbolic links
ansible.builtin.file:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
force: yes
state: link
loop:
- { src: /data/rc/apps, dest: /share/apps }
- name: Enable autofs service
ansible.builtin.service:
name: autofs
enabled: true
- name: nfs_mounts using autofs
include_tasks: autofs.yml
when: use_autofs