From 399a65e7790f49ec29411686bc8ac3a9a8635ff0 Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Wed, 5 Apr 2023 02:53:04 +0000 Subject: [PATCH] feat: multinode setup --- create_hostname.sh | 8 ++++++++ head_node_setup.sh | 19 +++++++++++++++++++ worker_node_setup.sh | 6 ++++++ 3 files changed, 33 insertions(+) create mode 100644 create_hostname.sh create mode 100644 head_node_setup.sh create mode 100644 worker_node_setup.sh diff --git a/create_hostname.sh b/create_hostname.sh new file mode 100644 index 00000000..8a9187f2 --- /dev/null +++ b/create_hostname.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +export WORKER_IP=$1 +N_GPUS=8 +# create dir if doesn't exist +sudo mkdir -p /job +printf "localhost slots=$N_GPUS\n$WORKER_IP slots=$N_GPUS" | sudo tee /job/hostfile +echo /job/hostfile \ No newline at end of file diff --git a/head_node_setup.sh b/head_node_setup.sh new file mode 100644 index 00000000..7a813d1e --- /dev/null +++ b/head_node_setup.sh @@ -0,0 +1,19 @@ +#!/bin/sh +WORKER_IP=$1 +N_GPUS=$2 + +sudo apt install -y nfs-kernel-server +sudo mkdir -p ./data_multiplus +sudo chmod 777 ./data_multiplus +printf "${PWD}/data_multiplus ${WORKER_IP}(rw,sync,no_subtree_check)" | sudo tee -a /etc/exports +sudo systemctl restart nfs-kernel-server + +sudo apt-get install -y pdsh +export DSHPATH=$PATH +export PDSH_RCMD_TYPE=ssh + +ssh-keygen -t rsa -N '' +cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + +sudo mkdir -p /job +printf "localhost slots=$N_GPUS\n$WORKER_IP slots=$N_GPUS" | sudo tee /job/hostfile \ No newline at end of file diff --git a/worker_node_setup.sh b/worker_node_setup.sh new file mode 100644 index 00000000..2c50e29d --- /dev/null +++ b/worker_node_setup.sh @@ -0,0 +1,6 @@ +#!/bin/sh +HEAD_IP=$1 + +sudo apt install -y nfs-common +sudo mkdir -p ./data_multiplus +sudo mount ${HEAD_IP}:${PWD}/data_multiplus ./data_multiplus \ No newline at end of file