gpt4 book ai didi

linux - 安装 Torque 后所有节点都关闭

转载 作者:太空宇宙 更新时间:2023-11-04 12:15:26 27 4
gpt4 key购买 nike

所有节点在新扭矩安装后注册为关闭。我不知道为什么

[root@rbx-1 6.0.1]# pbsnodes -a
rbx-1
state = down
power_state = Running
np = 1
ntype = cluster
mom_service_port = 15002
mom_manager_port = 15003

rbx-2
state = down
power_state = Running
np = 1
ntype = cluster
mom_service_port = 15002
mom_manager_port = 15003

这是 qmgr 说的

[root@rbx-1 6.0.1]# qmgr -c 'p s'

create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime = 01:00:00
set queue batch enabled = True
set queue batch started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_hosts = rbx-1
set server managers = root@rbx-1
set server operators = root@rbx-1
set server default_queue = batch
set server log_events = 2047
set server mail_from = adm
set server node_check_rate = 150
set server tcp_timeout = 300
set server job_stat_rate = 300
set server poll_jobs = True
set server down_on_error = True
set server mom_job_sync = True
set server keep_completed = 300
set server next_job_number = 0
set server moab_array_compatible = True
set server nppcu = 1
set server timeout_for_job_delete = 120
set server timeout_for_job_requeue = 120

请帮忙 - 我不知道是什么原因造成的,也不知道接下来要尝试什么。关于教程或其他方面的任何想法都会有所帮助

最佳答案

尝试运行 momctl -d0 -h rbx-1 以查看 MOM 是否正在与服务器通信。确保 server_name 文件中的主机名与服务器和计算节点上的/etc/hosts 匹配。我猜你在节点上的/etc/hosts 中没有短名称。

关于linux - 安装 Torque 后所有节点都关闭,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47562041/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com