I. Server 端
1. download pkg
website: http://www.adaptivecomputing.com/support/download-center/torque-download/
wget http://www.adaptivecomputing.com/index.php?wpfb_dl=2880
tar -xvzf index.php?wpfb_dl=2880
2.1 Install
yum -y update
yum -y install libxml2-devel openssl-devel gcc gcc-c++
cd torque-4.2.10
./configure
#指令會放在#
/usr/local/bin, /usr/local/lib, /usr/local/sbin, /usr/local/include, and /usr/local/man
make
make install
echo '/usr/local/lib' > /etc/ld.so.conf.d/torque.conf
ldconfig
2.2 path setting
export PATH="$PATH":/usr/local/bin:/usr/local/sbin
echo'/usr/local/lib'> /etc/ld.so.conf.d/torque.conf
ldconfig
3. 把pbs_server 加入 service
[root@SI01 torque-4.2.10]# make packages
(pbs_serve可以提供qsub平台)
cp contrib/init.d/pbs_server /etc/init.d/
cp contrib/init.d/pbs_mom /etc/init.d/
(pbs_sched可以提供queque)
cp contrib/init.d/pbs_sched /etc/init.d/
(client 透過trquthd跟pbs_serve溝通)
cp contrib/init.d/trqauthd /etc/init.d/
[root@PC01 torque-4.2.10]# /etc/init.d/trqauthd start
[root@PC01 torque-4.2.10]# chkconfig --add pbs_sched
[root@PC01 torque-4.2.10]# chkconfig --add pbs_server
[root@PC01 torque-4.2.10]# chkconfig --add pbs_mom
[root@PC01 torque-4.2.10]# service pbs_server status
pbs_server is stopped
3. pbs設定:開機啟動pbs設定
[root@PC01 torque-4.2.10]# chkconfig pbs_server on
[root@PC01 torque-4.2.10]# chkconfig pbs_sched on
如果head node要加入運算:
[root@PC01 torque-4.2.10]# chkconfig pbs_mom on
4. 初始化資料庫
[root@pc01 torque-4.2.10]# ./torque.setup root
查看server 設定
[root@pc01 torque-4.2.10]# qmgr -c 'print server'
5. 編輯node 資源
vi /var/spool/torque/server_priv/nodes
PC1 np=2
PC2 np=2
[root@pc01 pbs]# service pbs_sched restart
[root@pc01 pbs]# service pbs_server restart
[root@pc01 pbs]# pbsnodes
查看server 狀態active
[root@pc01 torque-4.2.10]# qmgr -c 'list server'
Server pc01
server_state = Active
查看scheduel 狀態 work
[root@pc01 torque-4.2.10]# qmgr -c 'list queue batch'
Queue batch
queue_type = Execution
6. 設定pbs_mom config (把下列指令寫成test.sh; 最後要sh test.sh)
#!/bin/sh
mom_file='/var/spool/torque/mom_priv/config'
echo '$pbsserver pc01' >>$mom_file
echo '$logevent 225' >>$mom_file
echo '$usecp *:/home /home' >>$mom_file
echo '$usecp *:/work1 /work1' >>$mom_file
echo '$usecp *:/work2 /work2' >>$mom_file
echo '$usecp *:/work3 /work3' >>$mom_file
II. computing nodes setting (在sercer端用 script 設定computing node端)
1. 原本在server端 torque package 放在opt下面; I.3 已經make package過了 所以 下面兩個file 已存在
#!/bin/sh
#1.copy mom scribpt to computing node
path='/opt/torque-4.2.10/'
File1='torque-package-clients-linux-x86_64.sh' #for submit job on computing node
File2='torque-package-mom-linux-x86_64.sh' #for runing job on computing node
#for i in SI02 SI03 SI04 SI05;do scp $path$File1 $path$File2 $i:/root/Documents/. ;done
for i in pc02 pc03;do ssh ${i} /root/Documents/$File1 --install ;done
for i in pc02 pc03;do ssh ${i} /root/Documents/$File2 --install ;done
2.啟動服務
#2. copy node setting to computing node
mom_config='/var/spool/torque/mom_priv/config'
for i in pc02 pc03; do scp $mom_config $i:$mom_config; done
#3. add PBS_mom to service
#pbs service
pbs_service='contrib/init.d/pbs_mom'
service_path='/etc/init.d/'
for i in pc02 pc03; do scp $path$pbs_service $i:$service_path; done
for i in pc02 pc03; do ssh ${i}'service pbs_mom start; service pbs_mom status';done
#4. start pbs_mom when power on
for i in pc02 pc03; do ssh ${i}'chkconfig --add pbs_mom;chkconfig pbs_mom on';done
防火牆
netstat -tlnp | grep ssh