Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • dwheel7/hpc-factory
  • rc/hpc-factory
  • louistw/hpc-factory
  • jpr/hpc-factory
  • krish94/hpc-factory
  • atlurie/hpc-factory
6 results
Show changes
Commits on Source (279)
Showing
with 788 additions and 337 deletions
...@@ -8,13 +8,12 @@ variables: ...@@ -8,13 +8,12 @@ variables:
ANSIBLE_REMOTE_TMP: "/tmp" ANSIBLE_REMOTE_TMP: "/tmp"
AWS_DEFAULT_REGION: "bhm" AWS_DEFAULT_REGION: "bhm"
AWS_HOST: "s3.lts.rc.uab.edu" AWS_HOST: "s3.lts.rc.uab.edu"
FF_SCRIPT_SECTIONS: "true"
OS_AUTH_TYPE: "v3applicationcredential" OS_AUTH_TYPE: "v3applicationcredential"
OS_AUTH_URL: "https://keystone.cloud.rc.uab.edu:5000/v3" OS_AUTH_URL: "https://keystone.cloud.rc.uab.edu:5000/v3"
OS_IDENTITY_API_VERSION: "3" OS_IDENTITY_API_VERSION: "3"
OS_INTERFACE: "public" OS_INTERFACE: "public"
OS_REGION_NAME: "bhm1" OS_REGION_NAME: "bhm1"
OOD_INSTANCE_NETWORK: "knightly-network" PROXY_NETWORK: "proxy-net"
PKR_VAR_flavor: "m1.medium-ruffner" PKR_VAR_flavor: "m1.medium-ruffner"
PKR_VAR_source_image: "CentOS-7-x86_64-GenericCloud-2009" PKR_VAR_source_image: "CentOS-7-x86_64-GenericCloud-2009"
PKR_VAR_floating_ip_network: "uab-campus" PKR_VAR_floating_ip_network: "uab-campus"
...@@ -22,26 +21,20 @@ variables: ...@@ -22,26 +21,20 @@ variables:
PKR_VAR_skip_create_image: "false" PKR_VAR_skip_create_image: "false"
PKR_VAR_ssh_username: "centos" PKR_VAR_ssh_username: "centos"
PKR_VAR_networks: '["8cf2f12e-905d-46d9-bc70-b0897c65f75a"]' PKR_VAR_networks: '["8cf2f12e-905d-46d9-bc70-b0897c65f75a"]'
PKR_VAR_image_membership: '["cf6fa1e53d4c40a49f4e0e469c440359"]'
GIT_AUTHOR_NAME: "Gitlab runner" GIT_AUTHOR_NAME: "Gitlab runner"
GIT_AUTHOR_EMAIL: "gitlab@runner" GIT_AUTHOR_EMAIL: "gitlab@runner"
NUM_SERVER_TO_KEEP: 1 INSTANCE_FLAVOR: "m1.medium-ruffner"
NUM_IMAGE_TO_KEEP: 30 HTTP_PROXY_INSTANCE_NAME: "http-proxy"
TIMESTAMP_REGEXP: '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{6}' SSH_PROXY_INSTANCE_NAME: "ssh-proxy"
PKR_VAR_root_ssh_key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBAFqqWgmYpEaGtHBeTu27ntVJpYjwq/x5aBefrvfhk8Z9lE3cuZ26vJ9n/9tGE4Zn2Pew1mpZgi6PzfJ3vMt8yA= root@master"
DEV_KEY: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCpncAcYosVHt7HsUcE2XOYDuCi4HQnmFJv279LOcpZgXtZ6o0BM1fe5FgJS0X1ohBXQUFRuYJuJSW/GSmC1K8T+wCrKjZLJdMbqrubHV27diUZfdoVkoJy1vcAQF5nEcoTC7MpAFbBomdn2rsrpgQe8DGiURV7+soqybXV1OsIR3FFf6npnUaskHYT/oVtG9eBOnscyBxoVgbxzlmyoBLXED/sHKFw4nQSF/glYKEFiDu6TRTsBBEGvv23Qo/66QpQiFJ6TNfApNiyY9L1X+Dy8EWU6lozmNgwGDjXQ70Lr6xHnA0QGVALJlHXa6QjpgtpC5Nefsdvtf1hpfFo2VutpbSB+aq9jk3gWNN+XkhrWN5PiwP7YYJNw/WozyfL+IhwjfHZGxkuws+wGR6ZKxlX9W9Vrsq9ncYNKuhy2SdsR6s2XECQtrEQ6ZlX5jRt6Yh5M9ls5fMsWEqknDPmr1Ui6wV7NxprYngo9fLSdYO/ETIO3S6PB0aEHOZOyGitGaM06EmNpvjQn/QkkaVgt/O8wKL1o1AVzXhDMAFvtG6ejppV6kuTUHXFgSGZF6N9fnP91HuytyzC09F+NMWcmnRdrgXlHapjuuL3zzi+XLCQvk8+aYTzBKx1nU2FPMDRZ9sInGmqdTuM002E7qVbaCy4OxcWaAS/L2UVhGnHr+egYw== louistw@uab.edu"
stages: stages:
- pre-build
- build - build
- test
- deploy - deploy
- cleanup
workflow: workflow:
rules: rules:
- if: $CI_PIPELINE_SOURCE == 'merge_request_event' - if: $CI_PIPELINE_SOURCE == "web"
- if: $CI_PIPELINE_SOURCE == 'schedule' - if: $CI_PIPELINE_SOURCE == "schedule"
.get_build_date: &get_build_date .get_build_date: &get_build_date
- export BUILD_DATE=$(TZ=America/Chicago date +%Y-%m-%dT%H%M%S) - export BUILD_DATE=$(TZ=America/Chicago date +%Y-%m-%dT%H%M%S)
...@@ -50,401 +43,386 @@ workflow: ...@@ -50,401 +43,386 @@ workflow:
.update_ansible_repo: &update_ansible_repo .update_ansible_repo: &update_ansible_repo
- *get_build_date - *get_build_date
- | - |
if [ ! -d $CI_PROJECT_DIR/CRI_XCBC ]; then export EXT_REPO_DIR=$(basename -s .git $EXT_PR_TARGET_REPO)
git clone https://github.com/uabrc/CRI_XCBC.git if [ ! -d $CI_PROJECT_DIR/$EXT_REPO_DIR ]; then
cd CRI_XCBC git clone ${EXT_PR_TARGET_REPO} ${EXT_REPO_DIR}
git remote add upstream https://github.com/jprorama/CRI_XCBC.git cd ${EXT_REPO_DIR}
git remote add upstream ${EXT_PR_SRC_REPO}
cd .. cd ..
fi fi
- cd CRI_XCBC - cd ${EXT_REPO_DIR}
- git config user.name "${GIT_AUTHOR_NAME}" - git config user.name "${GIT_AUTHOR_NAME}"
- git config user.email "${GIT_AUTHOR_EMAIL}" - git config user.email "${GIT_AUTHOR_EMAIL}"
- git fetch origin uab-prod - git checkout ${EXT_PR_TARGET_BRANCH}
- git fetch upstream dev - git fetch origin ${EXT_PR_TARGET_BRANCH}
- git checkout uab-prod - git merge origin/${EXT_PR_TARGET_BRANCH}
- git merge origin/uab-prod
- git checkout -b integration - git checkout -b integration
- git merge upstream/dev - git fetch upstream ${EXT_PR_SRC_BRANCH}
- export CRI_XCBC_HEAD=$(git rev-parse --short HEAD) - git merge upstream/${EXT_PR_SRC_BRANCH}
- export CRI_XCBC_dev=$(git rev-parse --short upstream/dev) # export vars into job artifacts
- export CRI_XCBC_prod=$(git rev-parse --short origin/uab-prod) - export EXT_REPO_HEAD=$(git rev-parse --short HEAD)
- export EXT_PR_SRC_BRANCH_SHA=$(git rev-parse --short upstream/${EXT_PR_SRC_BRANCH})
- export EXT_PR_TARGET_BRANCH_SHA=$(git rev-parse --short origin/${EXT_PR_TARGET_BRANCH})
- cd .. - cd ..
- export PACKER_IMAGE_HEAD=$(git rev-parse --short HEAD) - export PACKER_IMAGE_HEAD=$(git rev-parse --short HEAD)
- echo CRI_XCBC_HEAD=${CRI_XCBC_HEAD} | tee -a $CI_PROJECT_DIR/image.env - echo EXT_REPO_HEAD=${EXT_REPO_HEAD} | tee -a $CI_PROJECT_DIR/image.env
- echo CRI_XCBC_dev=${CRI_XCBC_dev} | tee -a $CI_PROJECT_DIR/image.env - echo EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} | tee -a $CI_PROJECT_DIR/image.env
- echo CRI_XCBC_prod=${CRI_XCBC_prod} | tee -a $CI_PROJECT_DIR/image.env - echo EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} | tee -a $CI_PROJECT_DIR/image.env
- echo PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} | tee -a $CI_PROJECT_DIR/image.env - echo PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} | tee -a $CI_PROJECT_DIR/image.env
.get_ansible_files: &get_ansible_files .get_ansible_files: &get_ansible_files
- s3cmd get --force -r --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ ansible/files/ - s3cmd get --force -r --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ ansible/files/
build_docker_image: .build_proxy_image_template: &build_proxy_image_template
image: docker:20.10.17
stage: pre-build
services:
- docker:20.10.16-dind
tags:
- dind
before_script:
- *get_build_date
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
script:
- docker build -t $CI_REGISTRY_IMAGE:$BUILD_DATE -t $CI_REGISTRY_IMAGE:latest .
- >
docker run --rm $CI_REGISTRY_IMAGE bash -c
'ansible --version &&
openstack --version &&
packer version &&
s3cmd --version &&
terraform --version'
- docker push --all-tags $CI_REGISTRY_IMAGE
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
changes:
- Dockerfile
allow_failure: true
build_base_image:
stage: build
tags:
- build
script: script:
- |
if [ -n "${BUILT_BASE_IMAGE_ID}" ]; then
exit 0
fi
- *update_ansible_repo - *update_ansible_repo
- *get_ansible_files - *get_ansible_files
- export REPO_HEAD=$(git rev-parse --short HEAD) # packer vars for job env
- export PKR_VAR_flavor="${BASE_BUILD_FLAVOR:-$PKR_VAR_flavor}" - export PKR_VAR_flavor="${PROXY_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export PKR_VAR_build_instance_name="base-${REPO_HEAD}" - export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false - export PKR_VAR_image_date_suffix=false
- | - |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="base-PR-${CI_MERGE_REQUEST_IID}" export PKR_VAR_image_name="${BUILD_TARGET}-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="base-${BUILD_DATE}" export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
fi
# Ansible var overrides
- |
if [ -n "${PROXY_ENABLE_VAR}" ]; then
sed -i -E "s/(${PROXY_ENABLE_VAR}: ).*/\1true/" $EXT_REPO_DIR/group_vars/all
fi fi
- packer init openstack - 'sed -i -E "s|(s3_endpoint: ).*|\1\"${S3_ENDPOINT}\"|" $EXT_REPO_DIR/group_vars/all'
- packer validate openstack - 'sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" $EXT_REPO_DIR/group_vars/all'
- packer build -machine-readable openstack | tee base_build.log - 'sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" $EXT_REPO_DIR/group_vars/all'
- export BUILT_BASE_IMAGE_ID=$(grep 'Image:' base_build.log | awk '{print $4}') - 'sed -i -E "s/(s3_shibboleth_bucket_name: ).*/\1\"${S3_SHIBBOLETH_BUCKET_NAME}\"/" $EXT_REPO_DIR/group_vars/all'
- echo BUILT_BASE_IMAGE_ID=${BUILT_BASE_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env - 'sed -i -E "s/(s3_shibboleth_object_name: ).*/\1\"${S3_SHIBBOLETH_OBJECT_NAME}\"/" $EXT_REPO_DIR/group_vars/all'
- openstack image unset --property signature_verified $BUILT_BASE_IMAGE_ID - 'sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup(''file'', ''${SSH_PUB_KEY}'') }}\"|" $EXT_REPO_DIR/group_vars/all'
# packer commands
- packer init openstack-proxy
- packer validate openstack-proxy
- packer build -machine-readable openstack-proxy | tee proxy_build.log
- export BUILT_PROXY_IMAGE_ID=$(grep 'Image:' proxy_build.log | awk '{print $4}')
- echo BUILT_PROXY_IMAGE_ID=${BUILT_PROXY_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
# set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} ${BUILT_PROXY_IMAGE_ID}
artifacts: artifacts:
reports: reports:
dotenv: image.env dotenv: image.env
expire_in: 30 days
build_compute_image: build_http_proxy_image:
stage: build stage: build
needs: [build_base_image] environment:
name: build
tags: tags:
- build - build
script: variables:
- *update_ansible_repo PROXY_ENABLE_VAR: "enable_http_proxy"
- *get_ansible_files <<: *build_proxy_image_template
- export PKR_VAR_source_image=${BUILT_BASE_IMAGE_ID} rules:
- export REPO_HEAD=$(git rev-parse --short HEAD) - if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "http-proxy"
- export PKR_VAR_flavor="${COMPUTE_BUILD_FLAVOR:-$PKR_VAR_flavor}" when: always
- export PKR_VAR_build_instance_name="compute-${REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="compute-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="compute-${BUILD_DATE}"
fi
- packer init openstack-compute
- packer validate openstack-compute
- packer build -machine-readable openstack-compute | tee compute_build.log
build_gpu_image: build_ssh_proxy_image:
stage: build stage: build
needs: [build_base_image] environment:
name: build
tags: tags:
- build - build
variables:
PROXY_ENABLE_VAR: "enable_ssh_proxy"
<<: *build_proxy_image_template
rules:
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "ssh-proxy"
when: always
.build_login_image_template: &build_login_image_template
script: script:
- *update_ansible_repo - *update_ansible_repo
- *get_ansible_files - *get_ansible_files
- FAILED=false # packer vars for job env
- export GPU_PLACEHOLDER_NAME="gpu1-placeholder" - export PKR_VAR_flavor="${PROXY_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- export GPU_PLACEHOLDER_FLAVOR="gpu1.medium" - export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- export GPU_PLACEHOLDER_IMAGE="CentOS-7-x86_64-GenericCloud-2009"
- export PKR_VAR_source_image=${BUILT_BASE_IMAGE_ID}
- export REPO_HEAD=$(git rev-parse --short HEAD)
- export PKR_VAR_flavor="${GPU_BUILD_FLAVOR:-gpu1.medium}"
- export PKR_VAR_build_instance_name="gpu-${REPO_HEAD}"
- export PKR_VAR_image_date_suffix=false - export PKR_VAR_image_date_suffix=false
- | - |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
export PKR_VAR_image_name="gpu-PR-${CI_MERGE_REQUEST_IID}" export PKR_VAR_image_name="${BUILD_TARGET}-PR-${CI_MERGE_REQUEST_IID}"
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
export PKR_VAR_image_name="gpu-${BUILD_DATE}" export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
fi
- packer init openstack-gpu
- packer validate openstack-gpu
- openstack server delete --wait $GPU_PLACEHOLDER_NAME
- packer build -machine-readable openstack-gpu | tee gpu_build.log || FAILED=true
- openstack server create --image $GPU_PLACEHOLDER_IMAGE --network cicd-net --flavor $GPU_PLACEHOLDER_FLAVOR $GPU_PLACEHOLDER_NAME
- |
if [ "$FAILED" = true ]; then
exit 1
fi fi
# packer commands
- packer init openstack-login
- packer validate openstack-login
- packer build -machine-readable openstack-login | tee login_build.log
- export BUILT_LOGIN_IMAGE_ID=$(grep 'Image:' login_build.log | awk '{print $4}')
- echo BUILT_LOGIN_IMAGE_ID=${BUILT_LOGIN_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
# set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${CI_COMMIT_SHORT_SHA} ${BUILT_LOGIN_IMAGE_ID}
artifacts:
reports:
dotenv: image.env
build_login_image:
stage: build
environment:
name: build
tags:
- build
<<: *build_login_image_template
rules: rules:
- if: $SKIP_GPU_BUILD == "true" - if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "login"
when: never when: always
- when: always
build_ood_image: build_ood_image:
stage: build stage: build
environment:
name: build
tags: tags:
- build - build
script: script:
- *update_ansible_repo - *update_ansible_repo
- *get_ansible_files - *get_ansible_files
- > # packer vars for job env
curl --header "PRIVATE-TOKEN: ${ANSIBLE_VAR_TOKEN}"
"${CI_API_V4_URL}/projects/2836/repository/files/knightly/raw?ref=main"
-o CRI_XCBC/group_vars/knightly
- 'sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(user_register_app_key: ).*/\1\"${SELF_REG_APP_KEY}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s/(celery_user_password: ).*/\1\"${CELERY_PASSWD}\"/" CRI_XCBC/group_vars/knightly'
- 'sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup(''file'', ''${SSH_PUB_KEY}'') }}\"|" CRI_XCBC/group_vars/knightly'
- export PKR_VAR_flavor="${OOD_BUILD_FLAVOR:-$PKR_VAR_flavor}" - export PKR_VAR_flavor="${OOD_BUILD_FLAVOR:-$PKR_VAR_flavor}"
- packer init openstack-ood - export PKR_VAR_build_instance_name="${BUILD_TARGET}-${EXT_REPO_HEAD}"
- packer validate openstack-ood - export PKR_VAR_image_date_suffix=false
- export PKR_VAR_image_name="${BUILD_TARGET}-${BUILD_TAG:-${BUILD_DATE}}"
- | - |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then if [ $ENV = 'knightly' ] || [ $ENV = 'prod' ]; then
export PKR_VAR_image_name="ood-PR-${CI_MERGE_REQUEST_IID}" curl --header "PRIVATE-TOKEN: ${ANSIBLE_VAR_TOKEN}" \
echo INSTANCE_FLAVOR="${PKR_VAR_flavor}" | tee -a $CI_PROJECT_DIR/image.env "${CI_API_V4_URL}/projects/2836/repository/files/$ENV/raw?ref=main" \
echo OOD_INSTANCE_NAME="ood-PR-${CI_MERGE_REQUEST_IID}" | tee -a $CI_PROJECT_DIR/image.env -o CRI_XCBC/group_vars/$ENV
export FLOATING_IP=$(openstack floating ip create uab-campus -f value -c floating_ip_address) sed -i -E "s/(lts_access_key: ).*/\1\"${AWS_ACCESS_KEY_ID}\"/" CRI_XCBC/group_vars/$ENV
echo FLOATING_IP=$FLOATING_IP | tee -a $CI_PROJECT_DIR/image.env sed -i -E "s/(lts_secret_key: ).*/\1\"${AWS_SECRET_ACCESS_KEY}\"/" CRI_XCBC/group_vars/$ENV
sed -i -E "s/(ood_servername: ).*/\1\"$CI_COMMIT_REF_SLUG.$FLOATING_IP.nip.io\"/" CRI_XCBC/group_vars/knightly sed -i -E "s/(user_register_app_key: ).*/\1\"${SELF_REG_APP_KEY}\"/" CRI_XCBC/group_vars/$ENV
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then sed -i -E "s/(celery_user_password: ).*/\1\"${CELERY_PASSWD}\"/" CRI_XCBC/group_vars/$ENV
export PKR_VAR_image_name="ood-${BUILD_DATE}" sed -i -E "s|(ssh_pub_key: ).*|\1\"{{ lookup('file', '${SSH_PUB_KEY}') }}\"|" CRI_XCBC/group_vars/$ENV
echo INSTANCE_FLAVOR="${OOD_INSTANCE_FLAVOR:-cpu16-64g}" | tee -a $CI_PROJECT_DIR/image.env
echo OOD_INSTANCE_NAME="ood-knightly" | tee -a $CI_PROJECT_DIR/image.env
echo FLOATING_IP=$TEST_IP | tee -a $CI_PROJECT_DIR/image.env
fi fi
- > # packer commands
PKR_VAR_build_instance_name="ood-${CRI_XCBC_HEAD}" - packer init openstack-ood
PKR_VAR_image_date_suffix=false - packer validate openstack-ood
packer build -machine-readable openstack-ood | tee ood_build.log - packer build -machine-readable openstack-ood | tee ood_build.log
- export BUILT_OOD_IMAGE_ID=$(grep 'Image:' ood_build.log | awk '{print $4}') - export BUILT_OOD_IMAGE_ID=$(grep 'Image:' ood_build.log | awk '{print $4}')
- echo BUILT_OOD_IMAGE_ID=${BUILT_OOD_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env - echo BUILT_OOD_IMAGE_ID=${BUILT_OOD_IMAGE_ID} | tee -a $CI_PROJECT_DIR/image.env
- openstack image set --property CRI_XCBC_prod=${CRI_XCBC_prod} --property CRI_XCBC_dev=${CRI_XCBC_dev} --property PACKER_IMAGE_HEAD=${PACKER_IMAGE_HEAD} ${BUILT_OOD_IMAGE_ID} # set image properties with repo state
- openstack image set --property EXT_PR_SRC_REPO=${EXT_PR_SRC_REPO} --property EXT_PR_SRC_BRANCH_SHA=${EXT_PR_SRC_BRANCH_SHA} --property EXT_PR_TARGET_REPO=${EXT_PR_TARGET_REPO} --property EXT_PR_TARGET_BRANCH_SHA=${EXT_PR_TARGET_BRANCH_SHA} --property PACKER_IMAGE_HEAD=${CI_COMMIT_SHORT_SHA} ${BUILT_OOD_IMAGE_ID}
artifacts: artifacts:
reports: reports:
dotenv: image.env dotenv: image.env
rules:
- if: $PIPELINE_TARGET == "build" && $BUILD_TARGET == "ood"
when: always
test_ood_image: deploy_http_proxy_node:
stage: test stage: deploy
needs: [build_ood_image]
environment: environment:
name: knightly name: $ENV
tags: tags:
- build - build
script: script:
- openstack image set --accept $BUILT_OOD_IMAGE_ID - openstack image set --accept $HTTP_PROXY_IMAGE_ID || true
- FAILED=false - FAILED=false
- | - |
eval $(ssh-agent -s) cat > user_data.txt <<EOF
chmod 400 "$SSH_PRIV_KEY"
ssh-add "$SSH_PRIV_KEY"
mkdir ~/.ssh
chmod 700 ~/.ssh
- OLD_INSTANCE_IP=$(openstack floating ip list --floating-ip-address $CHEAHA_IP -c "Fixed IP Address" -f value)
- echo $OLD_INSTANCE_IP
- |
if [ ! -z $OLD_INSTANCE_IP ]; then
export OLD_INSTANCE_ID=$(openstack server list --name $OOD_INSTANCE_NAME --ip $OLD_INSTANCE_IP -c ID -f value)
fi
- echo OLD_INSTANCE_ID=$OLD_INSTANCE_ID | tee -a instance.env
- |
cat > user_data.txt << OEOF
#!/bin/bash #!/bin/bash
cat > /etc/resolv.conf << EOF cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
search openstack.internal cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster [main]
nameserver 172.20.0.25 dns=none
EOF EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys echo "$DEV_KEY" >> /root/.ssh/authorized_keys
mkdir -p /run/shibboleth ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
chown shibd:shibd /run/shibboleth git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
pip3 install s3cmd cd /tmp/${CI_PROJECT_NAME}
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://knightly-key/ /etc/ssh/ git checkout ${CI_COMMIT_REF_NAME}
OEOF cat >> ansible/hosts<<EEOF
- > [$ENV]
export NEW_INSTANCE_ID=$(openstack server create 127.0.0.1
-c id -f value --image $BUILT_OOD_IMAGE_ID EEOF
--network $OOD_INSTANCE_NETWORK ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
--security-group ood-https-ports rm -rf /tmp/${CI_PROJECT_NAME}
--security-group node-exporter EOF
--security-group allow-ssh
--user-data user_data.txt
--flavor $INSTANCE_FLAVOR
--wait
$OOD_INSTANCE_NAME)
- echo NEW_INSTANCE_ID=$NEW_INSTANCE_ID | tee -a instance.env
- openstack server add floating ip $NEW_INSTANCE_ID $FLOATING_IP
- >
curl --retry 10 --retry-delay 20 --retry-connrefused https://knightly.rc.uab.edu/Shibboleth.sso/Metadata --resolve knightly.rc.uab.edu:443:$FLOATING_IP -kf
|| FAILED=true
- |
if [ $CI_PIPELINE_SOURCE == 'merge_request_event' ]; then
ssh -o StrictHostKeyChecking=no acctsvc@$FLOATING_IP '[ $(mount | grep "etc/auto" | wc -l) -eq 6 ]' || FAILED=true
elif [ $CI_PIPELINE_SOURCE == 'schedule' ]; then
cp "$SSH_KNOWN_HOSTS" ~/.ssh/known_hosts
chmod 644 ~/.ssh/known_hosts
ssh acctsvc@$FLOATING_IP '[ $(mount | grep "etc/auto" | wc -l) -eq 6 ]' || FAILED=true
fi
- | - |
if [ "$FAILED" = true ]; then export cmd="openstack server create"
if [ "${DELETE_WHEN_FAILED-true}" = true ]; then cmd+=" -c id -f value --image $HTTP_PROXY_IMAGE_ID"
openstack server delete $NEW_INSTANCE_ID cmd+=" --flavor $INSTANCE_FLAVOR"
openstack image delete $BUILT_OOD_IMAGE_ID for security_group in ${SECURITY_GROUP_LIST[@]};
fi do
false cmd+=" --security-group $security_group"
fi done
cmd+=" --user-data user_data.txt"
if [ -n "$PROXY_NETWORK" ];then cmd+=" --network $PROXY_NETWORK"; fi
if [ -n "$HTTP_PROXY_PORT" ];then cmd+=" --port $HTTP_PROXY_PORT"; fi
cmd+=" --wait $HTTP_PROXY_INSTANCE_NAME"
- export HTTP_PROXY_INSTANCE_ID=$(bash -c "$cmd")
- | - |
if [ $CI_PIPELINE_SOURCE = "schedule" ]; then # Associate the floating IP(s) with the HTTP Proxy instance
openstack server remove floating ip $NEW_INSTANCE_ID $FLOATING_IP for HTTP_PROXY_FLOATING_IP in ${HTTP_PROXY_FLOATING_IP_LIST[@]};
fi do
artifacts: echo "Associating FLOATING_IP $HTTP_PROXY_FLOATING_IP with HTTP_PROXY_INSTANCE_ID $HTTP_PROXY_INSTANCE_ID"
reports: openstack server add floating ip $HTTP_PROXY_INSTANCE_ID $HTTP_PROXY_FLOATING_IP
dotenv: instance.env done
deploy_review:
stage: deploy
script:
- echo "Deploy Review App"
environment:
name: review/$CI_COMMIT_REF_SLUG
url: https://$CI_COMMIT_REF_SLUG.$FLOATING_IP.nip.io
on_stop: stop_review
auto_stop_in: 2 days
tags:
- build
rules:
- if: $CI_MERGE_REQUEST_ID
stop_review:
stage: deploy
script:
- openstack server delete $NEW_INSTANCE_ID
- openstack image delete $BUILT_OOD_IMAGE_ID
- openstack floating ip delete $FLOATING_IP
environment:
name: review/$CI_COMMIT_REF_SLUG
action: stop
tags:
- build
rules: rules:
- if: $CI_MERGE_REQUEST_ID - if: $PIPELINE_TARGET == "deploy" && $HTTP_PROXY_IMAGE_ID
when: manual when: always
deploy_knightly: deploy_ssh_proxy_node:
stage: deploy stage: deploy
environment: environment:
name: knightly name: $ENV
tags: tags:
- build - build
script: script:
- openstack image set --accept $SSH_PROXY_IMAGE_ID || true
- FAILED=false
- | - |
if [ ! -z $OLD_INSTANCE_ID ]; then cat > user_data.txt <<EOF
openstack server remove floating ip $OLD_INSTANCE_ID $CAMPUS_IP #!/bin/bash
openstack server remove floating ip $OLD_INSTANCE_ID $CHEAHA_IP cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
fi [main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- | - |
if [ ! -z $NEW_INSTANCE_ID ]; then export cmd="openstack server create"
openstack server add floating ip $NEW_INSTANCE_ID $CAMPUS_IP cmd+=" -c id -f value --image $SSH_PROXY_IMAGE_ID"
openstack server add floating ip $NEW_INSTANCE_ID $CHEAHA_IP cmd+=" --flavor $INSTANCE_FLAVOR"
fi for security_group in ${SECURITY_GROUP_LIST[@]};
only: do
- schedules cmd+=" --security-group $security_group"
done
cmd+=" --user-data user_data.txt"
if [ -n "$PROXY_NETWORK" ];then cmd+=" --network $PROXY_NETWORK"; fi
if [ -n "$SSH_PROXY_PORT" ];then cmd+=" --port $SSH_PROXY_PORT"; fi
cmd+=" --wait $SSH_PROXY_INSTANCE_NAME"
- export SSH_PROXY_INSTANCE_ID=$(bash -c "$cmd")
- |
# Associate the floating IP(s) with the SSH Proxy instance
for SSH_PROXY_FLOATING_IP in ${SSH_PROXY_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $SSH_PROXY_FLOATING_IP with SSH_PROXY_INSTANCE_ID $SSH_PROXY_INSTANCE_ID"
openstack server add floating ip $SSH_PROXY_INSTANCE_ID $SSH_PROXY_FLOATING_IP
done
rules:
- if: $PIPELINE_TARGET == "deploy" && $SSH_PROXY_IMAGE_ID
when: always
deploy_cheaha: deploy_login_node:
stage: deploy stage: deploy
environment: environment:
name: cheaha name: $ENV
tags:
- build
script:
- echo "Job placeholder to deploy to Cheaha"
when: manual
only:
- main
cleanup_knightly:
stage: cleanup
tags: tags:
- build - build
script: script:
- OS_PROJECT_ID=$(openstack application credential show $OS_APPLICATION_CREDENTIAL_ID -f value -c project_id) - openstack image set --accept $LOGIN_IMAGE_ID || true
- > - FAILED=false
SERVER_TO_BE_DELETE=($(openstack server list --name $OOD_INSTANCE_NAME --sort-column Image --sort-descending -f value -c ID
| awk -v NSTK=$NUM_SERVER_TO_KEEP '{count++}
{if (count>NSTK) print}'))
- openstack image list --sort-column Name --sort-descending -f value -c Name -c ID --property owner=$OS_PROJECT_ID > images.txt
- >
OOD_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=ood-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
BASE_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=base-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
COMPUTE_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=compute-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- >
GPU_IMAGE_TO_BE_DELETE=($(cat images.txt
| awk -v NITK=$NUM_IMAGE_TO_KEEP -v REGEX=gpu-$TIMESTAMP_REGEX
'{if ($0 ~ REGEX) result[count++] = $1}
END {for(i=NITK;i<count;i++) print result[i]}'))
- |
for svr in ${SERVER_TO_BE_DELETE[@]}; do
openstack server delete ${svr}
done
- |
for img in ${OOD_IMAGE_TO_BE_DELETE[@]}; do
openstack image delete ${img}
done
- | - |
for img in ${BASE_IMAGE_TO_BE_DELETE[@]}; do cat > user_data.txt <<EOF
openstack image delete ${img} #!/bin/bash
done cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ /tmp/${CI_PROJECT_NAME}/ansible/files/
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- | - |
for img in ${COMPUTE_IMAGE_TO_BE_DELETE[@]}; do export cmd="openstack server create"
openstack image delete ${img} cmd+=" -c id -f value --image $LOGIN_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done done
cmd+=" --user-data user_data.txt"
if [ -n "$INSTANCE_NETWORK" ];then cmd+=" --network $INSTANCE_NETWORK"; fi
if [ -n "$LOGIN_PORT" ];then cmd+=" --port $LOGIN_PORT"; fi
cmd+=" --wait $LOGIN_INSTANCE_NAME"
- export LOGIN_INSTANCE_ID=$(bash -c "$cmd")
- | - |
for img in ${GPU_IMAGE_TO_BE_DELETE[@]}; do # Associate the floating IP(s) with the SSH Proxy instance
openstack image delete ${img} for LOGIN_FLOATING_IP in ${LOGIN_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $LOGIN_FLOATING_IP with LOGIN_INSTANCE_ID $LOGIN_INSTANCE_ID"
openstack server add floating ip $LOGIN_INSTANCE_ID $LOGIN_FLOATING_IP
done done
rules: rules:
- if: $CI_PIPELINE_SOURCE == "schedule" - if: $PIPELINE_TARGET == "deploy" && $LOGIN_IMAGE_ID
when: always when: always
cleanup_mr: deploy_ood_node:
stage: cleanup stage: deploy
environment:
name: $ENV
tags: tags:
- build - build
script: script:
- OS_PROJECT_ID=$(openstack application credential show $OS_APPLICATION_CREDENTIAL_ID -f value -c project_id) - openstack image set --accept $OOD_IMAGE_ID || true
- > - FAILED=false
IMAGE_TO_BE_DELETE=($(openstack image list --sort-column Name --sort-descending -f value -c Name -c ID --property owner=$OS_PROJECT_ID - |
| awk -v REGEX="(ood|base|compute|gpu)-PR-$CI_MERGE_REQUEST_IID" '{if ($0 ~ REGEX) print $1}')) cat > user_data.txt <<EOF
#!/bin/bash
cat >> /etc/NetworkManager/conf.d/90-dns-none.conf<<EEOF
[main]
dns=none
EEOF
systemctl reload NetworkManager
echo "$DEV_KEY" >> /root/.ssh/authorized_keys
ip route replace default via ${DEFAULT_GATEWAY_IP} dev eth0
git clone ${CI_REPOSITORY_URL} /tmp/${CI_PROJECT_NAME}
cd /tmp/${CI_PROJECT_NAME}
git checkout ${CI_COMMIT_REF_NAME}
cat >> ansible/hosts<<EEOF
[$ENV]
127.0.0.1
EEOF
s3cmd get --force -r --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --host=$AWS_HOST --host-bucket=$AWS_HOST s3://cheaha-cloud-ansible-files/ /tmp/${CI_PROJECT_NAME}/ansible/files/
ansible-playbook -c local -i ansible/hosts --extra-vars="$EXTRA_VARS" ansible/cluster.yml | tee -a /tmp/ansible.log
rm -rf /tmp/${CI_PROJECT_NAME}
EOF
- |
export cmd="openstack server create"
cmd+=" -c id -f value --image $OOD_IMAGE_ID"
cmd+=" --flavor $INSTANCE_FLAVOR"
for security_group in ${SECURITY_GROUP_LIST[@]};
do
cmd+=" --security-group $security_group"
done
cmd+=" --user-data user_data.txt"
if [ -n "$INSTANCE_NETWORK" ];then cmd+=" --network $INSTANCE_NETWORK"; fi
if [ -n "$OOD_PORT" ];then cmd+=" --port $OOD_PORT"; fi
cmd+=" --wait $OOD_INSTANCE_NAME"
- export OOD_INSTANCE_ID=$(bash -c "$cmd")
- | - |
for img in ${IMAGE_TO_BE_DELETE[@]}; do # Associate the floating IP(s) with the SSH Proxy instance
openstack image delete ${img} for OOD_FLOATING_IP in ${OOD_FLOATING_IP_LIST[@]};
do
echo "Associating FLOATING_IP $OOD_FLOATING_IP with OOD_INSTANCE_ID $OOD_INSTANCE_ID"
openstack server add floating ip $OOD_INSTANCE_ID $OOD_FLOATING_IP
done done
rules: rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" - if: $PIPELINE_TARGET == "deploy" && $OOD_IMAGE_ID
when: always when: always
...@@ -3,4 +3,5 @@ ...@@ -3,4 +3,5 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
---
- name: Setup node for use as a virtual cheaha node
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts' }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client' }
---
- name: Setup node for use as a virtual cheaha node
hosts: all
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts', when: enable_nfs_mounts }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client', when: enable_slurm_client }
- { name: 'ssh_host_keys', tags: 'ssh_host_keys' }
- { name: 'ssh_proxy_config', tags: 'ssh_proxy_config', when: enable_ssh_proxy_config }
- { name: 'ssl_cert', tags: 'ssl_cert', when: enable_ssl_certs }
- { name: 'rsyslog_config', tags: 'rsyslog_config', when: enable_rsyslog_config }
- { name: 'rewrite_map', tags: 'rewrite_map', when: enable_rewrite_map }
- { name: 'fail2ban', tags: 'fail2ban', when: enable_fail2ban }
- { name: 'install_node_exporter', tags: 'install_node_exporter', when: enable_node_exporter }
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'cuda_driver', tags: 'cuda_driver' } - { name: 'cuda_driver', tags: 'cuda_driver' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' } - { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
......
...@@ -4,9 +4,88 @@ ...@@ -4,9 +4,88 @@
yum_repo_files: [] yum_repo_files: []
pkg_list: [] pkg_list: []
slurm_version: 18.08.9 slurm_version: 18.08.9
enable_slurm_client: false
# NHC related # NHC related
nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm" nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm"
nhc_download_path: "/tmp" nhc_download_path: "/tmp"
nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git" nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git"
nhc_git_repo_path: "/tmp/nhc" nhc_git_repo_path: "/tmp/nhc"
root_ssh_key: ""
# cheaha.node related
hostname_lookup_table:
- "10.141.255.254 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- openstack.internal
- cm.cluster
nameserver_list:
- 10.141.255.254
# ldap_config related
ldap_cert_path: "/etc/openldap/certs"
ldap_uri: "ldap://ldapserver"
# nfs_mounts related
enable_nfs_mounts: true
use_autofs: false
use_fstab: false
mount_points:
- { "src": "master:/gpfs4", "path": "/gpfs4", "opts": "ro,sync,hard", "mode": "0755" }
- { "src": "master:/gpfs5", "path": "/gpfs5", "opts": "ro,sync,hard", "mode": "0755" }
autofs_mounts:
- { "src": "master:/gpfs4/&", "path": "/gpfs4", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs4", "map_name": "gpfs4", key: "*" }
- { "src": "master:/gpfs5/&", "path": "/gpfs5", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs5", "map_name": "gpfs5", key: "*" }
#SSH Host Keys
S3_ENDPOINT: ""
SSH_HOST_KEYS_S3_BUCKET: ""
SSH_HOST_KEYS_S3_OBJECT: ""
# AWS credentials
LTS_ACCESS_KEY: ""
LTS_SECRET_KEY: ""
# ssh proxy
enable_ssh_proxy_config: false
sshpiper_dest_dir: "/opt/sshpiper"
# rsyslog
enable_rsyslog_config: true
rsyslog_target: "*.* @master:514"
# ssl certs
enable_ssl_certs: false
ssl_cert_s3_bucket: ""
ssl_cert_key_location: "/etc/pki/tls/private"
ssl_cert_file_location: "/etc/pki/tls/certs"
ssl_cert_key: ""
ssl_cert_file: ""
ssl_cert_chain_file: ""
ssl_apache_config: ""
apache_service: "httpd"
# rewrite map
enable_rewrite_map: false
target_groups:
- {"name": "gpfs4", "host": "login001", "default": True }
- {"name": "gpfs5", "host": "login002", "default": False }
# account app
account_app_port: 8000
# fail2ban
enable_fail2ban: false
maxretry: 1
findtime: 600
bantime: 1200
fail2ban_white_list: "127.0.0.1/8"
# Node Exporter
enable_node_exporter: false
node_exporter_ver: "1.8.2"
node_exporter_filename: "node_exporter-{{ node_exporter_ver }}.linux-amd64"
node_exporter_user: node_exporter
node_exporter_group: node_exporter
node_exporter_port: 9100
---
# cheaha.node related
hostname_lookup_table:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- cm.cluster
- rc.uab.edu
- ib.cluster
- drac.cluster
- eth.cluster
- ib-hdr.cluster
nameserver_list:
- 172.20.0.25
bright_openldap_path: "/cm/local/apps/openldap"
ldap_cert_path: "{{bright_openldap_path}}/etc/certs"
ldap_uri: "ldaps://ldapserver"
# proxy_config
target_groups:
- {"name": "gpfs5", "host": "login002", "default": False, "authorized_keys":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
- {"name": "gpfs4", "host": "login001", "default": True, "authorized_keys":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
...@@ -3,9 +3,7 @@ ...@@ -3,9 +3,7 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
- { name: 'install_nhc', tags: 'install_nhc'} - { name: 'install_nhc', tags: 'install_nhc'}
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'install_zsh', tags: 'install_zsh' } - { name: 'install_zsh', tags: 'install_zsh' }
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
...@@ -4,15 +4,24 @@ ...@@ -4,15 +4,24 @@
path: /etc/hosts path: /etc/hosts
line: "{{ item }}" line: "{{ item }}"
loop: loop:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02" "{{ hostname_lookup_table }}"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
- name: Add proper DNS search to lookup other nodes on the cluster - name: Add proper DNS search to lookup other nodes on the cluster
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/dhcp/dhclient.conf path: /etc/dhcp/dhclient.conf
insertbefore: BOF insertbefore: BOF
line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";' line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";'
create: true
state: present
- name: Template resolv.conf
ansible.builtin.template:
src: resolv.conf.j2
dest: /etc/resolv.conf
owner: root
group: root
mode: 0644
backup: true
- name: Disable SELinux - name: Disable SELinux
ansible.posix.selinux: ansible.posix.selinux:
...@@ -25,6 +34,7 @@ ...@@ -25,6 +34,7 @@
owner: root owner: root
group: root group: root
mode: 0644 mode: 0644
when: "'cm.repo' in yum_repo_files"
- name: Add ssh key for root access - name: Add ssh key for root access
ansible.posix.authorized_key: ansible.posix.authorized_key:
...@@ -35,3 +45,7 @@ ...@@ -35,3 +45,7 @@
- name: Set timezone to America/Chicago - name: Set timezone to America/Chicago
community.general.timezone: community.general.timezone:
name: America/Chicago name: America/Chicago
retries: 3
delay: 3
register: result
until: not result.failed
search {{ domain_search_list | join(' ') }}
{% for name_server in nameserver_list %}
nameserver {{ name_server }}
{% endfor %}
---
- name: Install fail2ban
ansible.builtin.package:
name: "{{ item }}"
state: present
loop:
- fail2ban
- fail2ban-firewalld
- name: Configure fail2ban
ansible.builtin.template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
backup: true
loop:
- { src: 'jail.local.j2', dest: '/etc/fail2ban/jail.local' }
- { src: 'sshpiperd_filter.local.j2', dest: '/etc/fail2ban/filter.d/sshpiperd.local' }
- { src: 'sshpiperd_jail.local.j2', dest: '/etc/fail2ban/jail.d/sshpiperd.local' }
- name: Activate the firewalld support for fail2ban
ansible.builtin.command:
cmd: mv /etc/fail2ban/jail.d/00-firewalld.conf /etc/fail2ban/jail.d/00-firewalld.local
- name: Configure firewalld to allow ssh and sshpiper traffic
ansible.posix.firewalld:
port: "{{ item }}"
zone: public
state: enabled
permanent: true
loop:
- 2222/tcp
- 22/tcp
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
- name: Enable and start fail2ban
ansible.builtin.service:
name: fail2ban
enabled: true
state: restarted
[DEFAULT]
banaction = firewalld
bantime = {{ bantime }}
ignoreip = {{ fail2ban_white_list }}
[sshd]
enabled = true
# Refer to https://github.com/fail2ban/fail2ban/wiki/Developing-Regex-in-Fail2ban for developing regex using fail2ban
#
[INCLUDES]
before = common.conf
[DEFAULT]
_daemon = sshpiperd
__iso_datetime = "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2}|Z)"
__pref = time=%(__iso_datetime)s level=(?:debug|error)
[Definition]
# Define the prefix regex for the log lines
prefregex = ^<F-MLFID>%(__prefix_line)s%(__pref)s</F-MLFID>\s+<F-CONTENT>.+</F-CONTENT>$
# Failregex to match the specific failure log lines (prefregex is automatically included)
failregex = ^msg="connection from .*failtoban: ip <HOST> too auth many failures"$
ignoreregex =
mode = normal
maxlines = 1
# This configuration will block the remote host after {{maxretry}} failed SSH login attempts.
[sshpiperd]
enabled = true
filter = sshpiperd
logpath = /var/log/messages
port = 22
maxretry = {{ maxretry }}
backend = auto
findtime = {{ findtime }}
---
- name: Get CentOS repo files
shell: ls /etc/yum.repos.d/CentOS-*
register: repo_files
- name: Remove mirrorlist from CentOS repo files
ansible.builtin.replace:
path: "{{ item }}"
regexp: '^mirrorlist'
replace: '#mirrorlist'
backup: yes
with_items: "{{ repo_files.stdout_lines }}"
- name: Use vault baseurl to CentOS repo files
ansible.builtin.replace:
path: "{{ item }}"
regexp: '^#baseurl=http://mirror.centos.org'
replace: 'baseurl=http://vault.centos.org'
backup: yes
with_items: "{{ repo_files.stdout_lines }}"
---
- name: Download node_exporter binary
ansible.builtin.get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_ver }}/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp/{{ node_exporter_filename }}.tar.gz"
- name: Extract node_exporter
ansible.builtin.unarchive:
src: "/tmp/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp"
remote_src: yes
- name: Create system group for user account {{ node_exporter_group }}
ansible.builtin.group:
name: "{{ node_exporter_group }}"
system: true
state: present
- name: Create system user account {{ node_exporter_user }}
ansible.builtin.user:
name: "{{ node_exporter_user }}"
comment: Prometheus node_exporter system account
group: "{{ node_exporter_group }}"
system: true
home: /var/lib/node_exporter
create_home: false
shell: /sbin/nologin
state: present
- name: Copy node_exporter binary
ansible.builtin.copy:
src: "/tmp/{{ node_exporter_filename }}/node_exporter"
dest: /usr/local/bin/node_exporter
remote_src: yes
owner: root
group: root
mode: 0755
- name: Copy systemd unit file
ansible.builtin.template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
- name: Clean up /tmp
ansible.builtin.file:
path: "/tmp/{{ item }}"
state: absent
loop:
- "{{ node_exporter_filename }}.tar.gz"
- "{{ node_exporter_filename }}"
- name: Restart node_exporter service
ansible.builtin.systemd:
daemon_reload: yes
name: node_exporter
state: restarted
enabled: true
- name: Collect facts about system services
ansible.builtin.service_facts:
- name: Configure firewalld to allow prometheus
ansible.posix.firewalld:
port: "{{ node_exporter_port }}/tcp"
zone: public
state: enabled
permanent: true
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
[Unit]
Description=Node Exporter
After=network.target
[Service]
User={{ node_exporter_user }}
Group={{ node_exporter_group }}
Type=simple
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:{{ node_exporter_port }} --collector.filesystem.mount-points-exclude "^/(dev|proc|run/user/.+|run/credentials/.+|sys|var/lib/docker/.+)($|/)" --collector.filesystem.fs-types-exclude "^(autofs|binfmt_misc|bpf|cgroup|tmpfs|sunrpc|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
[Install]
WantedBy=multi-user.target
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
- nss-pam-ldapd - nss-pam-ldapd
- openldap - openldap
- openldap-clients - openldap-clients
- openldap-servers
- sssd-ldap - sssd-ldap
- name: Update nsswitch.conf to look for ldap - name: Update nsswitch.conf to look for ldap
...@@ -25,7 +24,7 @@ ...@@ -25,7 +24,7 @@
- name: Copy ldap cert(s) into place - name: Copy ldap cert(s) into place
ansible.builtin.copy: ansible.builtin.copy:
src: "{{ item.src }}" src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}" dest: "{{ ldap_cert_path }}/{{ item.src }}"
owner: ldap owner: ldap
group: ldap group: ldap
mode: 0440 mode: 0440
...@@ -33,10 +32,11 @@ ...@@ -33,10 +32,11 @@
- { src: ca.pem } - { src: ca.pem }
- { src: ldap.key } - { src: ldap.key }
- { src: ldap.pem } - { src: ldap.pem }
when: ldap_uri | regex_search('^ldaps://')
- name: Copy ldap config into place - name: Copy ldap config into place
ansible.builtin.copy: ansible.builtin.template:
src: nslcd.conf src: nslcd.conf.j2
dest: /etc/nslcd.conf dest: /etc/nslcd.conf
owner: root owner: root
group: root group: root
...@@ -46,5 +46,6 @@ ...@@ -46,5 +46,6 @@
ansible.builtin.service: ansible.builtin.service:
name: "{{ item }}" name: "{{ item }}"
enabled: yes enabled: yes
state: restarted
loop: loop:
- nslcd - nslcd
# This is the configuration file for the LDAP nameservice
# switch library's nslcd daemon. It configures the mapping
# between NSS names (see /etc/nsswitch.conf) and LDAP
# information in the directory.
# See the manual page nslcd.conf(5) for more information.
# The user and group nslcd should run as.
uid nslcd
gid ldap
# The uri pointing to the LDAP server to use for name lookups.
# Multiple entries may be specified. The address that is used
# here should be resolvable without using LDAP (obviously).
#uri ldap://127.0.0.1/
#uri ldaps://127.0.0.1/
#uri ldapi://%2fvar%2frun%2fldapi_sock/
# Note: %2f encodes the '/' used as directory separator
uri {{ ldap_uri }}
# The LDAP version to use (defaults to 3
# if supported by client library)
#ldap_version 3
# The distinguished name of the search base.
base dc=cm,dc=cluster
# The distinguished name to bind to the server with.
# Optional: default is to bind anonymously.
#binddn cn=proxyuser,dc=example,dc=com
# The credentials to bind with.
# Optional: default is no credentials.
# Note that if you set a bindpw you should check the permissions of this file.
#bindpw secret
# The distinguished name to perform password modifications by root by.
#rootpwmoddn cn=admin,dc=example,dc=com
# The default search scope.
#scope sub
#scope one
#scope base
# Customize certain database lookups.
#base group ou=Groups,dc=example,dc=com
#base passwd ou=People,dc=example,dc=com
#base shadow ou=People,dc=example,dc=com
#scope group onelevel
#scope hosts sub
# Bind/connect timelimit.
#bind_timelimit 30
# Search timelimit.
#timelimit 30
# Idle timelimit. nslcd will close connections if the
# server has not been contacted for the number of seconds.
idle_timelimit 240
# Use StartTLS without verifying the server certificate.
#ssl start_tls
#tls_reqcert never
{% if ldap_uri | regex_search('^ldaps://') %}
ssl on
tls_reqcert demand
# CA certificates for server certificate verification
#tls_cacertdir /etc/ssl/certs
tls_cacertfile /cm/local/apps/openldap/etc/certs/ca.pem
tls_cert /cm/local/apps/openldap/etc/certs/ldap.pem
tls_key /cm/local/apps/openldap/etc/certs/ldap.key
{% endif %}
# Seed the PRNG if /dev/urandom is not provided
#tls_randfile /var/run/egd-pool
# SSL cipher suite
# See man ciphers for syntax
#tls_ciphers TLSv1
# Client certificate and key
# Use these, if your server requires client authentication.
# Mappings for Services for UNIX 3.5
#filter passwd (objectClass=User)
#map passwd uid msSFU30Name
#map passwd userPassword msSFU30Password
#map passwd homeDirectory msSFU30HomeDirectory
#map passwd homeDirectory msSFUHomeDirectory
#filter shadow (objectClass=User)
#map shadow uid msSFU30Name
#map shadow userPassword msSFU30Password
#filter group (objectClass=Group)
#map group member msSFU30PosixMember
# Mappings for Services for UNIX 2.0
#filter passwd (objectClass=User)
#map passwd uid msSFUName
#map passwd userPassword msSFUPassword
#map passwd homeDirectory msSFUHomeDirectory
#map passwd gecos msSFUName
#filter shadow (objectClass=User)
#map shadow uid msSFUName
#map shadow userPassword msSFUPassword
#map shadow shadowLastChange pwdLastSet
#filter group (objectClass=Group)
#map group member posixMember
# Mappings for Active Directory
#pagesize 1000
#referrals off
#idle_timelimit 800
#filter passwd (&(objectClass=user)(!(objectClass=computer))(uidNumber=*)(unixHomeDirectory=*))
#map passwd uid sAMAccountName
#map passwd homeDirectory unixHomeDirectory
#map passwd gecos displayName
#filter shadow (&(objectClass=user)(!(objectClass=computer))(uidNumber=*)(unixHomeDirectory=*))
#map shadow uid sAMAccountName
#map shadow shadowLastChange pwdLastSet
#filter group (objectClass=group)
# Alternative mappings for Active Directory
# (replace the SIDs in the objectSid mappings with the value for your domain)
#pagesize 1000
#referrals off
#idle_timelimit 800
#filter passwd (&(objectClass=user)(objectClass=person)(!(objectClass=computer)))
#map passwd uid cn
#map passwd uidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
#map passwd gidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
#map passwd homeDirectory "/home/$cn"
#map passwd gecos displayName
#map passwd loginShell "/bin/bash"
#filter group (|(objectClass=group)(objectClass=person))
#map group gidNumber objectSid:S-1-5-21-3623811015-3361044348-30300820
# Mappings for AIX SecureWay
#filter passwd (objectClass=aixAccount)
#map passwd uid userName
#map passwd userPassword passwordChar
#map passwd uidNumber uid
#map passwd gidNumber gid
#filter group (objectClass=aixAccessGroup)
#map group cn groupName
#map group gidNumber gid
# This comment prevents repeated auto-migration of settings.