Praveen C
2010-05-04 09:28:17 UTC
Hello
Please excuse me if you received multiple copies of this email. I was having
trouble with my mailing list management.
I am unable to run jobs through pbs. The job gets stuck in queue forever. I
have given some info below. Please tell me if I can give any more info to
debug this problem. Hope somebody can point me in the right direction.
Thanks
praveen
[root at master log]# qmgr -c 'print server'
#
# Create queues and set their attributes.
#
#
# Create and define queue default
#
create queue default
set queue default queue_type = Execution
set queue default enabled = True
set queue default started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_host_enable = False
set server acl_hosts = master.tifrbng.res.in
set server default_queue = default
set server log_events = 511
set server mail_from = adm
set server query_other_jobs = True
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server next_job_number = 16
Here is reason for queuing:
[praveen at master alpha_sweep]$ checkjob 15
checking job 15
State: Idle EState: Deferred
Creds: user:praveen group:[DEFAULT] class:default qos:DEFAULT
WallTime: 00:00:00 of 00:30:00
SubmitTime: Tue May 4 09:45:39
(Time Queued Total: 00:13:13 Eligible: 00:00:01)
StartDate: -00:13:11 Tue May 4 09:45:41
Total Tasks: 10
Req[0] TaskCount: 10 Partition: ALL
Network: [NONE] Memory >= 0 Disk >= 0 Swap >= 0
Opsys: [NONE] Arch: [NONE] Features: [nash]
IWD: [NONE] Executable: [NONE]
Bypass: 0 StartCount: 1
PartitionMask: [ALL]
Flags: RESTARTABLE
job is deferred. Reason: RMFailure (job cannot be started - cannot set
hostlist)
Holds: Defer (hold reason: RMFailure)
PE: 10.00 StartPriority: 1
cannot select job 15 for partition DEFAULT (job hold active)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.supercluster.org/pipermail/torqueusers/attachments/20100504/07da3219/attachment.html
Please excuse me if you received multiple copies of this email. I was having
trouble with my mailing list management.
I am unable to run jobs through pbs. The job gets stuck in queue forever. I
have given some info below. Please tell me if I can give any more info to
debug this problem. Hope somebody can point me in the right direction.
Thanks
praveen
[root at master log]# qmgr -c 'print server'
#
# Create queues and set their attributes.
#
#
# Create and define queue default
#
create queue default
set queue default queue_type = Execution
set queue default enabled = True
set queue default started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_host_enable = False
set server acl_hosts = master.tifrbng.res.in
set server default_queue = default
set server log_events = 511
set server mail_from = adm
set server query_other_jobs = True
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server next_job_number = 16
Here is reason for queuing:
[praveen at master alpha_sweep]$ checkjob 15
checking job 15
State: Idle EState: Deferred
Creds: user:praveen group:[DEFAULT] class:default qos:DEFAULT
WallTime: 00:00:00 of 00:30:00
SubmitTime: Tue May 4 09:45:39
(Time Queued Total: 00:13:13 Eligible: 00:00:01)
StartDate: -00:13:11 Tue May 4 09:45:41
Total Tasks: 10
Req[0] TaskCount: 10 Partition: ALL
Network: [NONE] Memory >= 0 Disk >= 0 Swap >= 0
Opsys: [NONE] Arch: [NONE] Features: [nash]
IWD: [NONE] Executable: [NONE]
Bypass: 0 StartCount: 1
PartitionMask: [ALL]
Flags: RESTARTABLE
job is deferred. Reason: RMFailure (job cannot be started - cannot set
hostlist)
Holds: Defer (hold reason: RMFailure)
PE: 10.00 StartPriority: 1
cannot select job 15 for partition DEFAULT (job hold active)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.supercluster.org/pipermail/torqueusers/attachments/20100504/07da3219/attachment.html