Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • rc/hpc-factory
  • louistw/hpc-factory
  • jpr/hpc-factory
  • krish94/hpc-factory
  • atlurie/hpc-factory
  • dwheel7/hpc-factory
6 results
Show changes
Commits on Source (286)
Showing
with 676 additions and 338 deletions
This diff is collapsed.
[defaults]
# change the default callback, you can only have one 'stdout' type enabled at a time.
#stdout_callback = skippy
stdout_callback = yaml
## Ansible ships with some plugins that require whitelisting,
## this is done to avoid running all of a type by default.
## These setting lists those that you want enabled for your system.
## Custom plugins should not need this unless plugin author specifies it.
# enable callback plugins, they can output to stdout but cannot be 'stdout' type.
callbacks_enabled = timer, debug, profile_roles, profile_tasks, minimal
# Force color
force_color = true
...@@ -3,4 +3,5 @@ ...@@ -3,4 +3,5 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
---
- name: Setup node for use as a virtual cheaha node
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts' }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client' }
---
- name: Setup node for use as a virtual cheaha node
hosts: all
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts', when: enable_nfs_mounts }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client', when: enable_slurm_client }
- { name: 'ssh_host_keys', tags: 'ssh_host_keys' }
- { name: 'ssh_proxy_config', tags: 'ssh_proxy_config', when: enable_ssh_proxy_config }
- { name: 'ssl_cert', tags: 'ssl_cert', when: enable_ssl_certs }
- { name: 'rsyslog_config', tags: 'rsyslog_config', when: enable_rsyslog_config }
- { name: 'rewrite_map', tags: 'rewrite_map', when: enable_rewrite_map }
- { name: 'fail2ban', tags: 'fail2ban', when: enable_fail2ban }
- { name: 'install_node_exporter', tags: 'install_node_exporter', when: enable_node_exporter }
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'cuda_driver', tags: 'cuda_driver' } - { name: 'cuda_driver', tags: 'cuda_driver' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' } - { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
......
...@@ -4,9 +4,91 @@ ...@@ -4,9 +4,91 @@
yum_repo_files: [] yum_repo_files: []
pkg_list: [] pkg_list: []
slurm_version: 18.08.9 slurm_version: 18.08.9
enable_slurm_client: false
# NHC related # NHC related
nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm" nhc_download_url: "https://github.com/mej/nhc/releases/download/1.4.3/lbnl-nhc-1.4.3-1.el7.noarch.rpm"
nhc_download_path: "/tmp" nhc_download_path: "/tmp"
nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git" nhc_git_repo: "https://gitlab.rc.uab.edu/rc/nhc.git"
nhc_git_repo_path: "/tmp/nhc" nhc_git_repo_path: "/tmp/nhc"
root_ssh_key: ""
# cheaha.node related
hostname_lookup_table:
- "10.141.255.254 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- openstack.internal
- cm.cluster
nameserver_list:
- 10.141.255.254
# ldap_config related
ldap_cert_path: "/etc/openldap/certs"
ldap_uri: "ldap://ldapserver"
# nfs_mounts related
enable_nfs_mounts: true
use_autofs: false
use_fstab: false
mount_points:
- { "src": "master:/gpfs4", "path": "/gpfs4", "opts": "ro,sync,hard", "mode": "0755" }
- { "src": "master:/gpfs5", "path": "/gpfs5", "opts": "ro,sync,hard", "mode": "0755" }
autofs_mounts:
- { "src": "master:/gpfs4/&", "path": "/gpfs4", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs4", "map_name": "gpfs4", key: "*" }
- { "src": "master:/gpfs5/&", "path": "/gpfs5", "opts": "fstype=nfs,vers=3,_netdev,default", "mode": '0755', "mount_point": "/gpfs5", "map_name": "gpfs5", key: "*" }
#SSH Host Keys
S3_ENDPOINT: ""
SSH_HOST_KEYS_S3_BUCKET: ""
SSH_HOST_KEYS_S3_OBJECT: ""
# AWS credentials
LTS_ACCESS_KEY: ""
LTS_SECRET_KEY: ""
# ssh proxy
enable_ssh_proxy_config: false
sshpiper_dest_dir: "/opt/sshpiper"
# rsyslog
enable_rsyslog_config: true
rsyslog_target: "*.* @master:514"
# ssl certs
enable_ssl_certs: false
ssl_cert_s3_bucket: ""
ssl_cert_key_location: "/etc/pki/tls/private"
ssl_cert_file_location: "/etc/pki/tls/certs"
ssl_cert_key: ""
ssl_cert_file: ""
ssl_cert_chain_file: ""
ssl_apache_config: ""
apache_service: "httpd"
# rewrite map
enable_rewrite_map: false
target_groups:
- {"name": "gpfs4", "host": "login001", "default": True }
- {"name": "gpfs5", "host": "login002", "default": False }
# account app
account_app_port: 8000
# fail2ban
enable_fail2ban: false
maxretry: 1
findtime: 600
bantime: 1200
fail2ban_white_list: "127.0.0.1/8"
# Node Exporter
enable_node_exporter: false
node_exporter_ver: "1.8.2"
node_exporter_filename: "node_exporter-{{ node_exporter_ver }}.linux-amd64"
node_exporter_user: node_exporter
node_exporter_group: node_exporter
node_exporter_port: 9100
# CentOS Repo
centos_base_url: "http://vault.centos.org"
---
# cheaha.node related
hostname_lookup_table:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
domain_search_list:
- cm.cluster
- rc.uab.edu
- ib.cluster
- drac.cluster
- eth.cluster
- ib-hdr.cluster
nameserver_list:
- 172.20.0.25
bright_openldap_path: "/cm/local/apps/openldap"
ldap_cert_path: "{{bright_openldap_path}}/etc/certs"
ldap_uri: "ldaps://ldapserver"
# proxy_config
target_groups:
- {"name": "gpfs5", "host": "login002", "default": False, "authorized_keys":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs5/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
- {"name": "gpfs4", "host": "login001", "default": True, "authorized_keys":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/authorized_keys", "private_key":"/gpfs4/data/user/home/$DOWNSTREAM_USER/.ssh/id_ecdsa"}
...@@ -3,9 +3,7 @@ ...@@ -3,9 +3,7 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
- { name: 'install_nhc', tags: 'install_nhc'} - { name: 'install_nhc', tags: 'install_nhc'}
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
hosts: default hosts: default
become: true become: true
roles: roles:
- { name: 'fix_centos_repo', tags: 'fix_centos_repo' }
- { name: 'install_packages', tags: 'install_packages' } - { name: 'install_packages', tags: 'install_packages' }
- { name: 'install_zsh', tags: 'install_zsh' } - { name: 'install_zsh', tags: 'install_zsh' }
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
...@@ -4,15 +4,24 @@ ...@@ -4,15 +4,24 @@
path: /etc/hosts path: /etc/hosts
line: "{{ item }}" line: "{{ item }}"
loop: loop:
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02" "{{ hostname_lookup_table }}"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
- name: Add proper DNS search to lookup other nodes on the cluster - name: Add proper DNS search to lookup other nodes on the cluster
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/dhcp/dhclient.conf path: /etc/dhcp/dhclient.conf
insertbefore: BOF insertbefore: BOF
line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";' line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";'
create: true
state: present
- name: Template resolv.conf
ansible.builtin.template:
src: resolv.conf.j2
dest: /etc/resolv.conf
owner: root
group: root
mode: 0644
backup: true
- name: Disable SELinux - name: Disable SELinux
ansible.posix.selinux: ansible.posix.selinux:
...@@ -25,6 +34,7 @@ ...@@ -25,6 +34,7 @@
owner: root owner: root
group: root group: root
mode: 0644 mode: 0644
when: "'cm.repo' in yum_repo_files"
- name: Add ssh key for root access - name: Add ssh key for root access
ansible.posix.authorized_key: ansible.posix.authorized_key:
...@@ -35,3 +45,7 @@ ...@@ -35,3 +45,7 @@
- name: Set timezone to America/Chicago - name: Set timezone to America/Chicago
community.general.timezone: community.general.timezone:
name: America/Chicago name: America/Chicago
retries: 3
delay: 3
register: result
until: not result.failed
search {{ domain_search_list | join(' ') }}
{% for name_server in nameserver_list %}
nameserver {{ name_server }}
{% endfor %}
---
- name: Install fail2ban
ansible.builtin.package:
name: "{{ item }}"
state: present
loop:
- fail2ban
- fail2ban-firewalld
- name: Configure fail2ban
ansible.builtin.template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
backup: true
loop:
- { src: 'jail.local.j2', dest: '/etc/fail2ban/jail.local' }
- { src: 'sshpiperd_filter.local.j2', dest: '/etc/fail2ban/filter.d/sshpiperd.local' }
- { src: 'sshpiperd_jail.local.j2', dest: '/etc/fail2ban/jail.d/sshpiperd.local' }
- name: Activate the firewalld support for fail2ban
ansible.builtin.command:
cmd: mv /etc/fail2ban/jail.d/00-firewalld.conf /etc/fail2ban/jail.d/00-firewalld.local
- name: Configure firewalld to allow ssh and sshpiper traffic
ansible.posix.firewalld:
port: "{{ item }}"
zone: public
state: enabled
permanent: true
loop:
- 2222/tcp
- 22/tcp
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
- name: Enable and start fail2ban
ansible.builtin.service:
name: fail2ban
enabled: true
state: restarted
[DEFAULT]
banaction = firewalld
bantime = {{ bantime }}
ignoreip = {{ fail2ban_white_list }}
[sshd]
enabled = true
# Refer to https://github.com/fail2ban/fail2ban/wiki/Developing-Regex-in-Fail2ban for developing regex using fail2ban
#
[INCLUDES]
before = common.conf
[DEFAULT]
_daemon = sshpiperd
__iso_datetime = "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:[+-]\d{2}:\d{2}|Z)"
__pref = time=%(__iso_datetime)s level=(?:debug|error)
[Definition]
# Define the prefix regex for the log lines
prefregex = ^<F-MLFID>%(__prefix_line)s%(__pref)s</F-MLFID>\s+<F-CONTENT>.+</F-CONTENT>$
# Failregex to match the specific failure log lines (prefregex is automatically included)
failregex = ^msg="connection from .*failtoban: ip <HOST> too auth many failures"$
ignoreregex =
mode = normal
maxlines = 1
# This configuration will block the remote host after {{maxretry}} failed SSH login attempts.
[sshpiperd]
enabled = true
filter = sshpiperd
logpath = /var/log/messages
port = 22
maxretry = {{ maxretry }}
backend = auto
findtime = {{ findtime }}
---
- name: Get CentOS repo files
shell: ls /etc/yum.repos.d/CentOS-*
register: repo_files
- name: Remove mirrorlist from CentOS repo files
ansible.builtin.replace:
path: "{{ item }}"
regexp: '^mirrorlist'
replace: '#mirrorlist'
backup: yes
with_items: "{{ repo_files.stdout_lines }}"
- name: Use vault baseurl to CentOS repo files
ansible.builtin.replace:
path: "{{ item }}"
regexp: '^#baseurl=http://mirror.centos.org'
replace: 'baseurl={{ centos_base_url }}'
backup: yes
with_items: "{{ repo_files.stdout_lines }}"
---
- name: Download node_exporter binary
ansible.builtin.get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_ver }}/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp/{{ node_exporter_filename }}.tar.gz"
- name: Extract node_exporter
ansible.builtin.unarchive:
src: "/tmp/{{ node_exporter_filename }}.tar.gz"
dest: "/tmp"
remote_src: yes
- name: Create system group for user account {{ node_exporter_group }}
ansible.builtin.group:
name: "{{ node_exporter_group }}"
system: true
state: present
- name: Create system user account {{ node_exporter_user }}
ansible.builtin.user:
name: "{{ node_exporter_user }}"
comment: Prometheus node_exporter system account
group: "{{ node_exporter_group }}"
system: true
home: /var/lib/node_exporter
create_home: false
shell: /sbin/nologin
state: present
- name: Copy node_exporter binary
ansible.builtin.copy:
src: "/tmp/{{ node_exporter_filename }}/node_exporter"
dest: /usr/local/bin/node_exporter
remote_src: yes
owner: root
group: root
mode: 0755
- name: Copy systemd unit file
ansible.builtin.template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
- name: Clean up /tmp
ansible.builtin.file:
path: "/tmp/{{ item }}"
state: absent
loop:
- "{{ node_exporter_filename }}.tar.gz"
- "{{ node_exporter_filename }}"
- name: Restart node_exporter service
ansible.builtin.systemd:
daemon_reload: yes
name: node_exporter
state: restarted
enabled: true
- name: Collect facts about system services
ansible.builtin.service_facts:
- name: Configure firewalld to allow prometheus
ansible.posix.firewalld:
port: "{{ node_exporter_port }}/tcp"
zone: public
state: enabled
permanent: true
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
- name: Enable and start firewalld
ansible.builtin.service:
name: firewalld
enabled: true
state: restarted
when:
- "'firewalld.service' in ansible_facts.services"
- ansible_facts.services["firewalld.service"].state == "running"
[Unit]
Description=Node Exporter
After=network.target
[Service]
User={{ node_exporter_user }}
Group={{ node_exporter_group }}
Type=simple
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:{{ node_exporter_port }} --collector.filesystem.mount-points-exclude "^/(dev|proc|run/user/.+|run/credentials/.+|sys|var/lib/docker/.+)($|/)" --collector.filesystem.fs-types-exclude "^(autofs|binfmt_misc|bpf|cgroup|tmpfs|sunrpc|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
[Install]
WantedBy=multi-user.target
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
- nss-pam-ldapd - nss-pam-ldapd
- openldap - openldap
- openldap-clients - openldap-clients
- openldap-servers
- sssd-ldap - sssd-ldap
- name: Update nsswitch.conf to look for ldap - name: Update nsswitch.conf to look for ldap
...@@ -25,7 +24,7 @@ ...@@ -25,7 +24,7 @@
- name: Copy ldap cert(s) into place - name: Copy ldap cert(s) into place
ansible.builtin.copy: ansible.builtin.copy:
src: "{{ item.src }}" src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}" dest: "{{ ldap_cert_path }}/{{ item.src }}"
owner: ldap owner: ldap
group: ldap group: ldap
mode: 0440 mode: 0440
...@@ -33,10 +32,11 @@ ...@@ -33,10 +32,11 @@
- { src: ca.pem } - { src: ca.pem }
- { src: ldap.key } - { src: ldap.key }
- { src: ldap.pem } - { src: ldap.pem }
when: ldap_uri | regex_search('^ldaps://')
- name: Copy ldap config into place - name: Copy ldap config into place
ansible.builtin.copy: ansible.builtin.template:
src: nslcd.conf src: nslcd.conf.j2
dest: /etc/nslcd.conf dest: /etc/nslcd.conf
owner: root owner: root
group: root group: root
...@@ -46,5 +46,6 @@ ...@@ -46,5 +46,6 @@
ansible.builtin.service: ansible.builtin.service:
name: "{{ item }}" name: "{{ item }}"
enabled: yes enabled: yes
state: restarted
loop: loop:
- nslcd - nslcd