gpt4 book ai didi

python - SLURM批处理脚本不执行Python脚本,不返回错误信息,不停止运行

转载 作者:行者123 更新时间:2023-12-04 17:25:50 27 4
gpt4 key购买 nike

在 SLURM 集群中,我正在提交一个调用 python 脚本的 shell 脚本(这两个脚本都可以在下面找到。当 shell 脚本执行时,它会到达调用 python 脚本的位置,但随后什么也没发生:没有输出,没有错误消息并且 SLURM 作业继续运行。
我假设 python 脚本的全部内容都不相关(但为了完成我还是将其包含在内)。出于调试目的,我插入了 print("script started")一开始就行,看看它是否运行,但它没有。我在输出中看到的最后一件事是 moved to directory .
我试着调用 test.py包含 print("test") 的脚本就在此之前,它会正常执行。
python 脚本无法启动的原因可能是什么,我该如何解决?
编辑:由于用户 jakub 建议更改 print("script started")print("script started", flush=True)成功打印。包括更多这些语句表明脚本实际上运行得非常好,只是没有输出任何内容。在不断执行的 for 循环中包含相同的语句也会使所有 print()以前缺少的语句被打印出来。
那么问题就变成了:为什么print()这里的语句需要有flush=True在这个脚本中而不是在其他脚本中?
外壳脚本:

#!/bin/bash
#SBATCH --mail-user=lukas.baehler@pathology.unibe.ch
#SBATCH --mail-type=end,fail
#SBATCH --output=out-ncl
#SBATCH --error=err-ncl
#SBATCH --job-name="Mask RCNN nucleus training and detection"

#SBATCH --time=24:00:00
#SBATCH --partition=gpu
#SBATCH --mem-per-cpu=64G

#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --constraint=rtx2080


conda init bash
source ~/.bashrc
conda activate nucl

cd MRCNN/samples/nucleus
echo "moved to directory"

python nucleus-pipeline2.py splitTMA
echo "Split TMAs"
python 脚本:

print("script started")

if __name__ == '__main__':
import argparse
import os

# Copied from later in script because the argparse part was moved up and is
# needed as default in --logs.
ROOT_DIR = os.path.abspath("../../")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")

# Parse command line arguments
parser = argparse.ArgumentParser(
description='Mask R-CNN for nuclei counting and segmentation')
parser.add_argument("command",
metavar="<command>",
help="'splitTMA', 'splitSpot', 'structure', 'train' or 'detect'")
parser.add_argument('--dataset', required=False,
metavar="/path/to/dataset/",
help='Root directory of the dataset')
parser.add_argument('--weights', required=False,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--subset', required=False,
metavar="Dataset sub-directory",
help="Subset of dataset to run prediction on")

# Own arguments
parser.add_argument("--input", required=False,
metavar="path/to/input/folder",
help="Optionally specify the input directory. Should only be used with splitTMA, splitSpot and structure.")
parser.add_argument("--output", required=False,
metavar="path/to/output/folder",
help="Optionally specify the output directory. Should only be used with splitTMA, splitSpot and structure.")

args = parser.parse_args()
assert args.command in ["train", "detect", "splitTMA", "splitSpot", "structure"], "Must set command."




################################################################################
# splitTMA
################################################################################


# The original script for this is tma-spot.py
# Splits a TMA into images of its spots.
if args.command == "splitTMA":


import os
import cv2
import numpy as np
from openslide import open_slide
from matplotlib import pyplot as plt


###################
# CONFIGURATION

# Defines the level of resolution for spot recognition
level = 7 # Default 7

# Defines the level of resolution to use for the new images
newLevel = 0 # Default 0 (higest resolution)

# Defines the spot size in pixels (has to be changed if newLevel is changed)
SpotSize = 3072 # Default 3500

# # Shift values are for alignment of the two slides.
# shiftX = 445 - 10
# shiftY = -64 + 10

print("Using the following parameters:\nlevel = {}\nnewLevel = {}\nSpotSize = {}".format(level, newLevel, SpotSize))
###################


# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)

if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "slides"
print("Using '{}' as input folder.".format(INPUT_DIR))

if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "spots"
print("Using '{}' as output folder.".format(OUTPUT_DIR))

# mrxs_filenames = [filename for filename in os.listdir("slides") if filename[-5:] == ".mrxs"]
mrxs_filenames = [filename for filename in os.listdir(INPUT_DIR) if filename[-5:] == ".mrxs"]
print("\nFound {} MIRAX files.".format(len(mrxs_filenames)))

# Loop through all .mrxs files.
for filename in mrxs_filenames:

print("\nReading {}\n".format(filename))

# filename = mrxs_filenames[0]
img = open_slide("{}/{}".format(INPUT_DIR, filename))

# # Use if you want to to see the resolution of all the levels.
# for i in range(img.level_count):
# print("Level", i, "dimension", img.level_dimensions[i],"down factor",img.level_downsamples[i])

# Use the level set previously and read the slide as an RGB image.
x_img = img.read_region((0,0), level, img.level_dimensions[level])
x_img = np.array(x_img)
rgb = np.zeros_like(x_img)
rgb[x_img==0] = 255
rgba_im = cv2.add(rgb,x_img)
imgLevel = cv2.cvtColor(rgba_im,cv2.COLOR_RGBA2RGB)
# plt.imsave("./Output/level" + str(level) + ".png", imgLevel) # <---------- USE FOR DEBUGGING

# Converts the image to gray levels and applies a gussian blur.
gray = cv2.cvtColor(imgLevel, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
# cv2.imwrite( "./Output/gray.png", gray_blur) # <-------------------------- USE FOR DEBUGGING

# Use an Otsu binarization to generate a mask for where tissue is.
ret3, thresh = cv2.threshold(gray_blur, 8, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh = ~thresh
cont_img = thresh.copy()
# cv2.imwrite( "spots/cd3/contour.png", cont_img) # <------------------------ USE FOR DEBUGGING

# Finds the contours of the mask generated.
contours, hierarchy = cv2.findContours(cont_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Loop through all contours
spot_nr = 0
for cnt in contours:

# Decide based on the area of the contour if it is a spot
area = cv2.contourArea(cnt)
spotInfo = []
x, y, w, h = cv2.boundingRect(cnt)
if area < 100 or area > 2000:
spotInfo.append([-1, x, y, w, h])
continue
if len(cnt) < 5:
spotInfo.append([-1, x, y, w, h])
continue

# Calculate the center of the spot
centerX = x + int(w/2)
centerY = y + int(h/2)

# Calculate how much it needs to be scaled
factorOld = img.level_downsamples[level]
factorNew = img.level_downsamples[newLevel]

# Read the spot region
spot = img.read_region((int(centerX * factorOld)-int(SpotSize/2),
int(centerY * factorOld)-int(SpotSize/2)),
newLevel, (SpotSize, SpotSize))
spot = cv2.cvtColor(np.array(spot), cv2.COLOR_RGBA2RGB)

# Create directory and save the image
if not os.path.isdir("{}/{}".format(OUTPUT_DIR, filename[:-5])):
os.makedirs("{}/{}".format(OUTPUT_DIR, filename[:-5]))
spot_name = "{0}/{1}/{1}-{2}.png".format(OUTPUT_DIR, filename[:-5],str(spot_nr).zfill(3))
plt.imsave(spot_name, spot)
spot_nr += 1

print("Spot {} saved - Center X and Y: {}, {}".format(spot_nr, centerX, centerY))
exit()



################################################################################
# splitSpot
################################################################################


# This is copied from spot-annotation.py
# Splits spots into tiles
if args.command == "splitSpot":

import os
import sys
import argparse

import re
import numpy as np
import cv2
from matplotlib import pyplot as plt


# VARIABLES

# Change the resolution of the tiles here. Note the image resolution
# must be an integer multiple of the tile resolutions (both dimensions).
tile_resolution = [768, 768]

# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)

if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "spots"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))

if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "tiles"
print("Using '{}' as output folder.".format(OUTPUT_DIR))


# EXECUTION

TMA_folders = os.listdir(INPUT_DIR)
spot_names = []
spot_count = 0
for name in TMA_folders:
spot_names.append(os.listdir("{}/{}".format(INPUT_DIR, name)))
spot_count += len(spot_names[-1])
print("\nFound {} TMA folders with a total of {} spot images.".format(len(TMA_folders), spot_count))


for a, TMA in enumerate(TMA_folders):
for b, spot in enumerate(spot_names[a]):

print("TMA: {}/{} - Spot: {}/{}".format(a+1, len(TMA_folders), b+1, len(spot_names[a])), end="\r")


# Read the image
img = cv2.imread("{}/{}/{}".format(INPUT_DIR,TMA, spot))

# Calculate how many tiles will be produced
tilesX = img.shape[0]/tile_resolution[0]
tilesY = img.shape[1]/tile_resolution[1]
assert (tilesX == int(tilesX) and tilesY == int(tilesY)), "Image resolution is not an integer multiple of the tile resolution."
tilesX, tilesY = int(tilesX), int(tilesY)

# Create the np array that will hold the tiles
tiles = np.zeros([tilesY,tilesX,tile_resolution[0],tile_resolution[1],3])

# Loop through all tiles and store them in tiles
for i in range(tilesX):
for j in range(tilesY):
tiles[j,i] = img[i*tile_resolution[0]:(i+1)*tile_resolution[0],
j*tile_resolution[1]:(j+1)*tile_resolution[1]]

tiles = tiles.astype("uint8")

# print("\nImage was split into {} tiles.".format(tiles.shape[0]*tiles.shape[1]))

# Save all the tiles
for x in range(tiles.shape[0]):
for y in range(tiles.shape[1]):
# Displays progression
# print("Saving {}/{} images...".format(str(x*tiles.shape[0]+y+1),tiles.shape[0]*tiles.shape[1]), end="\r")

# Using the plt.imsave() gives alterations in color which is
# presumably bad. Using cv2.imwrite() is also ca. 10 times faster.
imdir = "{}/{}/{}".format(OUTPUT_DIR, TMA, spot[:-4])
imname = "{}-{}-{}.png".format(spot[:-4], str(x).zfill(2), str(y).zfill(2))
if not os.path.isdir(imdir):
os.makedirs(imdir)


cv2.imwrite("{}/{}".format(imdir, imname), tiles[x,y])

print("\nSaved images in {} as [spot_name]-x-y.png.".format(OUTPUT_DIR))
exit()



################################################################################
# Prepare Data Structure
################################################################################


# Adapted from prepare-data-structure.py
# Creates the data structure required for the network
if args.command == "structure":

import os
from shutil import copyfile

NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)

# Setting input and output directories
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "tiles"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))

if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "data"
print("Using '{}' as output folder.".format(OUTPUT_DIR))

# Creates a list with the paths of all tiles. Also stores just the
# filename itself with and without file extension
file_names = []
for path,_,files in os.walk(INPUT_DIR):
for f in files:
file_names.append(["{}/{}".format(path, f), f, f[:-4]])
print("\nFound {} images.".format(len(file_names)))

assert file_names != [], "No images found in input folder."

# The dataset needs to be stored inside another folder (default "own_data")
subset = "own_data"

# For each file creates the appropriate sub-folders and copies the file.
skip_count = 0
for i,info in enumerate(file_names):
print("Saving {}/{} images.".format(i+1, len(file_names)), end="\r")
dirname = "{}/{}/{}/images".format(OUTPUT_DIR, subset, info[2])
try:
os.makedirs(dirname)
except:
skip_count += 1
continue
copyfile(info[0], "{}/{}".format(dirname, info[1]))


print("\n\nSaved dataset in {}/{}".format(OUTPUT_DIR, subset))
if skip_count > 0:
print("Skipped {} files because they already existed.".format(skip_count))
print("")
exit()

最佳答案

Python 默认缓冲 stdin、stdout 和 stderr。 print()写信给 stdout默认情况下,您将看到此缓冲行为。
来自 https://stackoverflow.com/a/14258511/5666087 :

Python opens the stdin, -out and -error streams in a buffered mode; it'll read or write in larger chunks, keeping data in memory until a threshold is reached.


您可以通过传递 flush=True 来强制刷新此缓冲区至 print .见 the documentation想要查询更多的信息。如果您有多个 print连续的语句,你只需要使用 flush=True在最后一个。

关于python - SLURM批处理脚本不执行Python脚本,不返回错误信息,不停止运行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63414318/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com