Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • rc/hpc-factory
  • louistw/hpc-factory
  • jpr/hpc-factory
  • krish94/hpc-factory
  • atlurie/hpc-factory
  • dwheel7/hpc-factory
  • jpr/vm-factory
  • rc/vm-factory
8 results
Show changes
Commits on Source (612)
Showing
with 3693 additions and 3101 deletions
This diff is collapsed.
FROM python:3.8-slim
ENV S3CMD_VER=2.3.0
ENV ANSIBLE_VER=4.10.0
ENV OSC_VER=5.8.0
ENV TF_VER=1.4.6
ENV PACKER_VER=1.9.4
RUN apt-get update && apt-get install --no-install-recommends -y \
git \
ssh \
curl \
wget \
unzip \
&& rm -rf /var/lib/apt/lists/*
RUN wget https://releases.hashicorp.com/packer/${PACKER_VER}/packer_${PACKER_VER}_linux_amd64.zip \
&& unzip packer_${PACKER_VER}_linux_amd64.zip -d /usr/local/bin \
&& rm packer_${PACKER_VER}_linux_amd64.zip
RUN wget https://releases.hashicorp.com/terraform/${TF_VER}/terraform_${TF_VER}_linux_amd64.zip \
&& unzip terraform_${TF_VER}_linux_amd64.zip -d /usr/local/bin \
&& rm terraform_${TF_VER}_linux_amd64.zip
RUN pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir \
s3cmd==$S3CMD_VER \
ansible==$ANSIBLE_VER \
python-openstackclient==$OSC_VER
[defaults]
# change the default callback, you can only have one 'stdout' type enabled at a time.
#stdout_callback = skippy
stdout_callback = yaml
## Ansible ships with some plugins that require whitelisting,
## this is done to avoid running all of a type by default.
## These setting lists those that you want enabled for your system.
## Custom plugins should not need this unless plugin author specifies it.
# enable callback plugins, they can output to stdout but cannot be 'stdout' type.
callbacks_enabled = timer, debug, profile_roles, profile_tasks, minimal
# Force color
force_color = true
---
- name: Install base packages
hosts: default
become: true
roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' }
This diff is collapsed.
---
- name: Setup node for use as a virtual cheaha node
hosts: all
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts', when: enable_nfs_mounts }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client', when: enable_slurm_client }
- { name: 'ssh_host_keys', tags: 'ssh_host_keys' }
- { name: 'ssh_proxy_config', tags: 'ssh_proxy_config', when: enable_ssh_proxy_config }
- { name: 'ssl_cert', tags: 'ssl_cert', when: enable_ssl_certs }
- { name: 'rsyslog_config', tags: 'rsyslog_config', when: enable_rsyslog_config }
- { name: 'rewrite_map', tags: 'rewrite_map', when: enable_rewrite_map }
- { name: 'fail2ban', tags: 'fail2ban', when: enable_fail2ban }
- { name: 'install_node_exporter', tags: 'install_node_exporter', when: enable_node_exporter }
#!/bin/bash
name=zsh
version=5.9
mkdir -p BUILD RPMS SOURCES SPECS SRPMS
prereqs="git epel-release bzip2 xz gzip tar"
rpm -q $prereqs
if [ $? -ne 0 ]; then sudo yum -y install $prereqs; fi
git clone https://src.fedoraproject.org/rpms/${name}.git
cp ${name}/${name}.spec SPECS/
cp ${name}/*.rhs SOURCES/
cp ${name}/dot* SOURCES/
# Perl may not be available, switching to sed
#perl -pi -e 's/^(BuildRequires: glibc-langpack-ja)/#$1/g;' ${name}/${name}.spec
sed -i '/BuildRequires: glibc-langpack-ja/s/^/#/' SPECS/${name}.spec
pkgs="rpm-build $(grep -E '^BuildRequires|^Requires' SPECS/${name}.spec | awk '{print $2}' | tr '\n' ' ')"; echo $pkgs
rpm -q $pkgs
if [ $? -ne 0 ]; then sudo yum install -y $pkgs; fi
if [ ! -f SOURCES/${name}-${version}.tar.xz ] ; then
curl -L -o SOURCES/${name}-${version}.tar.xz https://downloads.sourceforge.net/${name}/${name}-${version}.tar.xz
fi
if [ ! -f SOURCES/${name}-${version}.tar.xz.asc ] ; then
curl -L -o SOURCES/${name}-${version}.tar.xz.asc https://downloads.sourceforge.net/${name}/${name}-${version}.tar.xz.asc
fi
cd SOURCES
sha512sum -c ../${name}/sources
retval=$?
cd -
if [ $retval -ne 0 ]; then
# echo "SOURCES/${name}-${version}.tar.gz did not match checksum in ${name}/source"
exit 1
fi
rpmbuild --define "_topdir `pwd`" -bb SPECS/${name}.spec
[nux-dextop] [nux-dextop]
name=Nux.Ro RPMs for general desktop use name=Nux.Ro RPMs for general desktop use
baseurl=http://li.nux.ro/download/nux/dextop/el7/$basearch/ http://mirror.li.nux.ro/li.nux.ro/nux/dextop/el7/$basearch/ baseurl=http://li.nux.ro/download/nux/dextop/el7/$basearch/
enabled=1 enabled=1
gpgcheck=1 gpgcheck=1
gpgkey=http://li.nux.ro/download/nux/RPM-GPG-KEY-nux.ro gpgkey=http://li.nux.ro/download/nux/RPM-GPG-KEY-nux.ro
......
---
- name: Setup node for use as a cluster host with gpu drivers/pkgs
hosts: default
become: true
roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'cuda_driver', tags: 'cuda_driver' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
- { name: 'install_nhc', tags: 'install_nhc'}
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
--- ---
#This file path is relative to the ansible playbook. zsh_ver: 5.7.1
pkg_list_file: "cheaha-compute-yum-pkg-list.txt" zsh_src_url: "https://www.zsh.org/pub/old/zsh-{{ zsh_ver }}.tar.xz"
yum_repo_files: []
pkg_list: []
slurm_version: 18.08.9
enable_slurm_client: false
lmod_db_host_machine: "ohpc" # NHC related
driver_run_file_link: "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm"
nhc_download_path: "/tmp"
nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git"
nhc_git_repo_path: "/tmp/nhc"
root_ssh_key: ""
# cheaha.node related
hostname_lookup_table:
- "10.141.255.254 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- openstack.internal
- cm.cluster
nameserver_list:
- 10.141.255.254
# ldap_config related
ldap_cert_path: "/etc/openldap/certs"
ldap_uri: "ldap://ldapserver"
# nfs_mounts related
enable_nfs_mounts: true
use_autofs: false
use_fstab: false
mount_points:
- { "src": "master:/gpfs4", "path": "/gpfs4", "opts": "ro,sync,hard", "mode": "0755" }
- { "src": "master:/gpfs5", "path": "/gpfs5", "opts": "ro,sync,hard", "mode": "0755" }
autofs_mounts:
- { "src": "master:/gpfs4/&", "path": "/gpfs4", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs4", "map_name": "gpfs4", key: "*" }
- { "src": "master:/gpfs5/&", "path": "/gpfs5", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs5", "map_name": "gpfs5", key: "*" }
#SSH Host Keys
S3_ENDPOINT: ""
SSH_HOST_KEYS_S3_BUCKET: ""
SSH_HOST_KEYS_S3_OBJECT: ""
# AWS credentials
LTS_ACCESS_KEY: ""
LTS_SECRET_KEY: ""
# ssh proxy
enable_ssh_proxy_config: false
sshpiper_dest_dir: "/opt/sshpiper"
# rsyslog
enable_rsyslog_config: true
rsyslog_target: "*.* @master:514"
# ssl certs
enable_ssl_certs: false
ssl_cert_s3_bucket: ""
ssl_cert_key_location: "/etc/pki/tls/private"
ssl_cert_file_location: "/etc/pki/tls/certs"
ssl_cert_key: ""
ssl_cert_file: ""
ssl_cert_chain_file: ""
ssl_apache_config: ""
apache_service: "httpd"
# rewrite map
enable_rewrite_map: false
target_groups:
- {"name": "gpfs4", "host": "login001", "default": True }
- {"name": "gpfs5", "host": "login002", "default": False }
# account app
account_app_port: 8000
# fail2ban
enable_fail2ban: false
maxretry: 1
findtime: 600
bantime: 1200
fail2ban_white_list: "127.0.0.1/8"
# Node Exporter
enable_node_exporter: false
node_exporter_ver: "1.8.2"
node_exporter_filename: "node_exporter-{{ node_exporter_ver }}.linux-amd64"
node_exporter_user: node_exporter
node_exporter_group: node_exporter
node_exporter_port: 9100
# CentOS Repo
centos_base_url: "http://vault.centos.org"
This diff is collapsed.
---
yum_repo_files:
- TurboVNC.repo
- cm.repo
pkg_list:
- "Lmod-7.8.11"
- "atftp-server"
- "cluster-tools-dell"
- "cluster-tools-slave"
- "cm-boost"
- "cm-config-ceph-release-luminous"
- "cm-config-cm"
- "cm-config-dhclient"
- "cm-config-dracut-slave"
- "cm-config-grub"
- "cm-config-ldap-client"
- "cm-config-limits"
- "cm-config-man"
- "cm-config-named"
- "cm-config-network-slave"
- "cm-config-nfsclient"
- "cm-config-rootfiles-slave"
- "cm-config-selinux"
- "cm-config-ssh-slave"
- "cm-config-sysctl-slave"
- "cm-config-syslog-slave"
- "cm-config-systemd"
- "cm-config-xntp-slave"
- "cm-config-yum"
- "cm-curl"
- "cm-dhcp"
- "cm-freeipmi"
- "cm-ipmitool"
- "cm-ipxe-slave"
- "cm-libpam"
- "cm-libprometheus"
- "cm-lua"
- "cm-mariadb-libs"
- "cm-openssl"
- "cm-python2"
- "cm-python36"
- "cm-slave"
- "cm-uge-client"
- "cmburn"
- "cmburn-slave"
- "cmdaemon"
- "cmdaemon-remotecm"
- "confuse"
- "gcc-recent"
- "gdb-recent"
- "lshw"
- "lua-bit32"
- "lua-filesystem"
- "lua-json"
- "lua-lpeg"
- "lua-posix"
- "lua-term"
- "mysql++"
- "net-snmp-recent"
- "node-installer-slave"
- "openvpn"
- "perl-Config-IniFiles"
- "python-dogpile-cache"
- "python-isodate"
- "python-netaddr"
- "python-netifaces"
- "python-setuptools_scm"
- "python-testtools"
- "python-websockify"
- "python2-cliff"
- "python2-debtcollector"
- "python2-deprecation"
- "python2-fixtures"
- "python2-funcsigs"
- "python2-ipaddress"
- "python2-pbr"
- "python2-positional"
- "python2-pysocks"
- "python2-pyyaml"
- "python2-requests-oauthlib"
- "python2-requestsexceptions"
- "python2-rfc3986"
- "python2-six"
- "python2-stevedore"
- "sdparm"
- "sshpass"
- "swig"
- "turbovnc-2.2.6*"
---
yum_repo_files:
- TurboVNC.repo
- cm.repo
pkg_list:
- "Lmod-7.8.11"
- "atftp-server"
- "cluster-tools-dell"
- "cluster-tools-slave"
- "cm-boost"
- "cm-config-ceph-release-luminous"
- "cm-config-cm"
- "cm-config-dhclient"
- "cm-config-dracut-slave"
- "cm-config-grub"
- "cm-config-ldap-client"
- "cm-config-limits"
- "cm-config-man"
- "cm-config-named"
- "cm-config-network-slave"
- "cm-config-nfsclient"
- "cm-config-rootfiles-slave"
- "cm-config-selinux"
- "cm-config-ssh-slave"
- "cm-config-sysctl-slave"
- "cm-config-syslog-slave"
- "cm-config-systemd"
- "cm-config-xntp-slave"
- "cm-config-yum"
- "cm-curl"
- "cm-dhcp"
- "cm-freeipmi"
- "cm-ipmitool"
- "cm-ipxe-slave"
- "cm-libpam"
- "cm-libprometheus"
- "cm-lua"
- "cm-mariadb-libs"
- "cm-openssl"
- "cm-python2"
- "cm-python36"
- "cm-slave"
- "cm-uge-client"
- "cmburn"
- "cmburn-slave"
- "cmdaemon"
- "cmdaemon-remotecm"
- "confuse"
- "gcc-recent"
- "gdb-recent"
- "lshw"
- "lua-bit32"
- "lua-filesystem"
- "lua-json"
- "lua-lpeg"
- "lua-posix"
- "lua-term"
- "mysql++"
- "net-snmp-recent"
- "node-installer-slave"
- "openvpn"
- "perl-Config-IniFiles"
- "python-dogpile-cache"
- "python-isodate"
- "python-netaddr"
- "python-netifaces"
- "python-setuptools_scm"
- "python-testtools"
- "python-websockify"
- "python2-cliff"
- "python2-debtcollector"
- "python2-deprecation"
- "python2-fixtures"
- "python2-funcsigs"
- "python2-ipaddress"
- "python2-pbr"
- "python2-positional"
- "python2-pysocks"
- "python2-pyyaml"
- "python2-requests-oauthlib"
- "python2-requestsexceptions"
- "python2-rfc3986"
- "python2-six"
- "python2-stevedore"
- "sdparm"
- "sshpass"
- "swig"
- "turbovnc-2.2.6*"
- "cuda-dcgm"
- "cuda-dcgm-libs"
- "cuda-dcgm-nvvs"
- "cuda-driver"
---
yum_repo_files:
- cm.repo
pkg_list:
- autofs
- Lmod
- tmux
- vim
---
# cheaha.node related
hostname_lookup_table:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- cm.cluster
- rc.uab.edu
- ib.cluster
- drac.cluster
- eth.cluster
- ib-hdr.cluster
nameserver_list:
- 172.20.0.25
bright_openldap_path: "/cm/local/apps/openldap"
ldap_cert_path: "{{bright_openldap_path}}/etc/certs"
ldap_uri: "ldaps://ldapserver"
# proxy_config
target_groups:
- {"name": "gpfs5", "host": "login002", "default": False, "authorized_keys":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
- {"name": "gpfs4", "host": "login001", "default": True, "authorized_keys":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
--- ---
- name: Run a play
ansible.builtin.import_playbook: node.yml
- name: Setup node for use as a virtual compute node - name: Setup node for use as a virtual compute node
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'compute_packages', tags: 'compute_packages' } - { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'install_nhc', tags: 'install_nhc'}
---
- name: Run a play
ansible.builtin.import_playbook: node-compute.yml
- name: Setup node for use as a cluster host with gpu drivers/pkgs
hosts: default
become: true
roles:
- { name: 'nvidia_driver', tags: 'nvidia_driver' }
---
- name: Setup node for use as a cluster host
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'lmod_user', tags: 'lmod_user' }
---
- name: Setup node for use as a virtual ood node
hosts: default
become: true
roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'install_zsh', tags: 'install_zsh' }
--- ---
# tasks file for cheaha.node
- name: Update /etc/hosts with cluster addressing - name: Update /etc/hosts with cluster addressing
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/hosts path: /etc/hosts
line: "{{ item }}" line: "{{ item }}"
loop: loop:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02" "{{ hostname_lookup_table }}"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
- name: Add proper DNS search to lookup other nodes on the cluster - name: Add proper DNS search to lookup other nodes on the cluster
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/dhcp/dhclient.conf path: /etc/dhcp/dhclient.conf
insertbefore: BOF insertbefore: BOF
line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";' line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";'
- name: Install prerequisite packages create: true
yum:
name: epel-release
state: present state: present
- name: Disable SELinux
ansible.posix.selinux: - name: Template resolv.conf
state: disabled ansible.builtin.template:
- name: Copy cm.repo into place (consider making this a template) src: resolv.conf.j2
ansible.builtin.copy: dest: /etc/resolv.conf
src: cm.repo
dest: /etc/yum.repos.d/cm.repo
owner: root owner: root
group: root group: root
mode: 0644 mode: 0644
backup: true
- name: Disable SELinux
ansible.posix.selinux:
state: disabled
- name: Copy CM repo GPG key - name: Copy CM repo GPG key
ansible.builtin.copy: ansible.builtin.copy:
src: RPM-GPG-KEY-cm src: RPM-GPG-KEY-cm
...@@ -34,118 +34,18 @@ ...@@ -34,118 +34,18 @@
owner: root owner: root
group: root group: root
mode: 0644 mode: 0644
- name: Create slurm group when: "'cm.repo' in yum_repo_files"
ansible.builtin.group:
name: slurm
state: present
gid: 450
- name: Create slurm user
ansible.builtin.user:
name: slurm
state: present
uid: 450
group: slurm
- name: Install required packages
yum:
name:
- slurm-client-18.08.9
- munge-0.5.13
- openldap-servers-2.4.48
- Lmod-7.7.14
- cm-modules-init-client-8.2
- cmdaemon
- nss-pam-ldapd
- ruby
- python3
state: present
- name: Update nsswitch.conf to look for ldap
ansible.builtin.replace:
dest: /etc/nsswitch.conf
regexp: '^({{ item }}:(?!.*\bldap\b).*)$'
replace: '\1 ldap'
loop:
- passwd
- shadow
- group
- netgroup
- automount
- name: Create base directories
ansible.builtin.file:
path: "{{ item.dir }}"
state: directory
mode: "{{ item.mode }}"
loop:
- { dir: /local, mode: '0777' }
- { dir: /scratch, mode: '0755' }
- { dir: /share, mode: '0755' }
- { dir: /data/rc/apps, mode: '0755' } # this is only required for the symlink to be happy
- { dir: /data/user, mode: '0755' }
- { dir: /data/project, mode: '0755' }
- name: Set up NFS GPFS mount point(s)
ansible.posix.mount:
path: "{{ item.path }}"
src: "{{ item.src }}"
fstype: "{{ item.fstype }}"
opts: "{{ item.opts }}"
state: present
loop:
- { path: /cm/shared, src: "gpfs.rc.uab.edu:/data/cm/shared-8.2", fstype: nfs, opts: "_netdev,defaults" }
- { path: /data/project, src: "gpfs.rc.uab.edu:/data/project", fstype: nfs, opts: "_netdev,defaults" }
- { path: /data/user, src: "gpfs.rc.uab.edu:/data/user", fstype: nfs, opts: "_netdev,local_lock=posix,defaults" }
- { path: /home, src: "/data/user/home", fstype: none, opts: bind }
- { path: /data/rc/apps, src: "gpfs.rc.uab.edu:/data/rc/apps", fstype: nfs, opts: "_netdev,defaults" }
- name: Create symbolic links
ansible.builtin.file:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
force: yes
state: link
loop:
- { src: /local, dest: /scratch/local }
- { src: /data/rc/apps, dest: /share/apps }
- name: Add ssh key for root access - name: Add ssh key for root access
ansible.posix.authorized_key: ansible.posix.authorized_key:
user: root user: root
state: present state: present
key: "{{ root_ssh_key }}" key: "{{ root_ssh_key }}"
- name: Copy munge key
ansible.builtin.copy:
src: munge.key
dest: /etc/munge/munge.key
owner: daemon
group: root
mode: 0400
- name: Copy ldap cert(s) into place
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}"
owner: ldap
group: ldap
mode: 0440
loop:
- { src: ca.pem }
- { src: ldap.key }
- { src: ldap.pem }
- name: Copy ldap config into place
ansible.builtin.copy:
src: nslcd.conf
dest: /etc/nslcd.conf
owner: root
group: root
mode: 0600
- name: Enable services
ansible.builtin.service:
name: "{{ item }}"
enabled: yes
loop:
- munge
- slurmd
- nslcd
- name: Set timezone to America/Chicago - name: Set timezone to America/Chicago
community.general.timezone: community.general.timezone:
name: America/Chicago name: America/Chicago
retries: 3
- name: Install zsh delay: 3
import_tasks: zsh.yml register: result
until: not result.failed