在 SLURM 集群中,我正在提交一个调用 python 脚本的 shell 脚本(这两个脚本都可以在下面找到。当 shell 脚本执行时,它会到达调用 python 脚本的位置,但随后什么也没发生:没有输出,没有错误消息并且 SLURM 作业继续运行。
我假设 python 脚本的全部内容都不相关(但为了完成我还是将其包含在内)。出于调试目的,我插入了 print("script started")
一开始就行,看看它是否运行,但它没有。我在输出中看到的最后一件事是 moved to directory
我试着调用 test.py
包含 print("test")
python 脚本无法启动的原因可能是什么,我该如何解决?
编辑:由于用户 jakub 建议更改 print("script started")
至 print("script started", flush=True)
成功打印。包括更多这些语句表明脚本实际上运行得非常好,只是没有输出任何内容。在不断执行的 for 循环中包含相同的语句也会使所有 print()
#SBATCH --mail-user=lukas.baehler@pathology.unibe.ch
#SBATCH --mail-type=end,fail
#SBATCH --output=out-ncl
#SBATCH --error=err-ncl
#SBATCH --job-name="Mask RCNN nucleus training and detection"
#SBATCH --time=24:00:00
#SBATCH --partition=gpu
#SBATCH --mem-per-cpu=64G
#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --constraint=rtx2080
conda init bash
source ~/.bashrc
conda activate nucl
cd MRCNN/samples/nucleus
echo "moved to directory"
python nucleus-pipeline2.py splitTMA
echo "Split TMAs"
python 脚本:
print("script started")
if __name__ == '__main__':
import argparse
import os
# Copied from later in script because the argparse part was moved up and is
# needed as default in --logs.
ROOT_DIR = os.path.abspath("../../")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Mask R-CNN for nuclei counting and segmentation')
help="'splitTMA', 'splitSpot', 'structure', 'train' or 'detect'")
parser.add_argument('--dataset', required=False,
help='Root directory of the dataset')
parser.add_argument('--weights', required=False,
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--subset', required=False,
metavar="Dataset sub-directory",
help="Subset of dataset to run prediction on")
# Own arguments
parser.add_argument("--input", required=False,
help="Optionally specify the input directory. Should only be used with splitTMA, splitSpot and structure.")
parser.add_argument("--output", required=False,
help="Optionally specify the output directory. Should only be used with splitTMA, splitSpot and structure.")
args = parser.parse_args()
assert args.command in ["train", "detect", "splitTMA", "splitSpot", "structure"], "Must set command."
# splitTMA
# The original script for this is tma-spot.py
# Splits a TMA into images of its spots.
if args.command == "splitTMA":
import os
import cv2
import numpy as np
from openslide import open_slide
from matplotlib import pyplot as plt
# Defines the level of resolution for spot recognition
level = 7 # Default 7
# Defines the level of resolution to use for the new images
newLevel = 0 # Default 0 (higest resolution)
# Defines the spot size in pixels (has to be changed if newLevel is changed)
SpotSize = 3072 # Default 3500
# # Shift values are for alignment of the two slides.
# shiftX = 445 - 10
# shiftY = -64 + 10
print("Using the following parameters:\nlevel = {}\nnewLevel = {}\nSpotSize = {}".format(level, newLevel, SpotSize))
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
if args.input:
INPUT_DIR = args.input
INPUT_DIR = "slides"
print("Using '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
OUTPUT_DIR = "spots"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# mrxs_filenames = [filename for filename in os.listdir("slides") if filename[-5:] == ".mrxs"]
mrxs_filenames = [filename for filename in os.listdir(INPUT_DIR) if filename[-5:] == ".mrxs"]
print("\nFound {} MIRAX files.".format(len(mrxs_filenames)))
# Loop through all .mrxs files.
for filename in mrxs_filenames:
print("\nReading {}\n".format(filename))
# filename = mrxs_filenames[0]
img = open_slide("{}/{}".format(INPUT_DIR, filename))
# # Use if you want to to see the resolution of all the levels.
# for i in range(img.level_count):
# print("Level", i, "dimension", img.level_dimensions[i],"down factor",img.level_downsamples[i])
# Use the level set previously and read the slide as an RGB image.
x_img = img.read_region((0,0), level, img.level_dimensions[level])
x_img = np.array(x_img)
rgb = np.zeros_like(x_img)
rgb[x_img==0] = 255
rgba_im = cv2.add(rgb,x_img)
imgLevel = cv2.cvtColor(rgba_im,cv2.COLOR_RGBA2RGB)
# plt.imsave("./Output/level" + str(level) + ".png", imgLevel) # <---------- USE FOR DEBUGGING
# Converts the image to gray levels and applies a gussian blur.
gray = cv2.cvtColor(imgLevel, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
# cv2.imwrite( "./Output/gray.png", gray_blur) # <-------------------------- USE FOR DEBUGGING
# Use an Otsu binarization to generate a mask for where tissue is.
ret3, thresh = cv2.threshold(gray_blur, 8, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh = ~thresh
cont_img = thresh.copy()
# cv2.imwrite( "spots/cd3/contour.png", cont_img) # <------------------------ USE FOR DEBUGGING
# Finds the contours of the mask generated.
contours, hierarchy = cv2.findContours(cont_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Loop through all contours
spot_nr = 0
for cnt in contours:
# Decide based on the area of the contour if it is a spot
area = cv2.contourArea(cnt)
spotInfo = []
x, y, w, h = cv2.boundingRect(cnt)
if area < 100 or area > 2000:
spotInfo.append([-1, x, y, w, h])
if len(cnt) < 5:
spotInfo.append([-1, x, y, w, h])
# Calculate the center of the spot
centerX = x + int(w/2)
centerY = y + int(h/2)
# Calculate how much it needs to be scaled
factorOld = img.level_downsamples[level]
factorNew = img.level_downsamples[newLevel]
# Read the spot region
spot = img.read_region((int(centerX * factorOld)-int(SpotSize/2),
int(centerY * factorOld)-int(SpotSize/2)),
newLevel, (SpotSize, SpotSize))
spot = cv2.cvtColor(np.array(spot), cv2.COLOR_RGBA2RGB)
# Create directory and save the image
if not os.path.isdir("{}/{}".format(OUTPUT_DIR, filename[:-5])):
os.makedirs("{}/{}".format(OUTPUT_DIR, filename[:-5]))
spot_name = "{0}/{1}/{1}-{2}.png".format(OUTPUT_DIR, filename[:-5],str(spot_nr).zfill(3))
plt.imsave(spot_name, spot)
spot_nr += 1
print("Spot {} saved - Center X and Y: {}, {}".format(spot_nr, centerX, centerY))
# splitSpot
# This is copied from spot-annotation.py
# Splits spots into tiles
if args.command == "splitSpot":
import os
import sys
import argparse
import re
import numpy as np
import cv2
from matplotlib import pyplot as plt
# Change the resolution of the tiles here. Note the image resolution
# must be an integer multiple of the tile resolutions (both dimensions).
tile_resolution = [768, 768]
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
if args.input:
INPUT_DIR = args.input
INPUT_DIR = "spots"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
OUTPUT_DIR = "tiles"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
TMA_folders = os.listdir(INPUT_DIR)
spot_names = []
spot_count = 0
for name in TMA_folders:
spot_names.append(os.listdir("{}/{}".format(INPUT_DIR, name)))
spot_count += len(spot_names[-1])
print("\nFound {} TMA folders with a total of {} spot images.".format(len(TMA_folders), spot_count))
for a, TMA in enumerate(TMA_folders):
for b, spot in enumerate(spot_names[a]):
print("TMA: {}/{} - Spot: {}/{}".format(a+1, len(TMA_folders), b+1, len(spot_names[a])), end="\r")
# Read the image
img = cv2.imread("{}/{}/{}".format(INPUT_DIR,TMA, spot))
# Calculate how many tiles will be produced
tilesX = img.shape[0]/tile_resolution[0]
tilesY = img.shape[1]/tile_resolution[1]
assert (tilesX == int(tilesX) and tilesY == int(tilesY)), "Image resolution is not an integer multiple of the tile resolution."
tilesX, tilesY = int(tilesX), int(tilesY)
# Create the np array that will hold the tiles
tiles = np.zeros([tilesY,tilesX,tile_resolution[0],tile_resolution[1],3])
# Loop through all tiles and store them in tiles
for i in range(tilesX):
for j in range(tilesY):
tiles[j,i] = img[i*tile_resolution[0]:(i+1)*tile_resolution[0],
tiles = tiles.astype("uint8")
# print("\nImage was split into {} tiles.".format(tiles.shape[0]*tiles.shape[1]))
# Save all the tiles
for x in range(tiles.shape[0]):
for y in range(tiles.shape[1]):
# Displays progression
# print("Saving {}/{} images...".format(str(x*tiles.shape[0]+y+1),tiles.shape[0]*tiles.shape[1]), end="\r")
# Using the plt.imsave() gives alterations in color which is
# presumably bad. Using cv2.imwrite() is also ca. 10 times faster.
imdir = "{}/{}/{}".format(OUTPUT_DIR, TMA, spot[:-4])
imname = "{}-{}-{}.png".format(spot[:-4], str(x).zfill(2), str(y).zfill(2))
if not os.path.isdir(imdir):
cv2.imwrite("{}/{}".format(imdir, imname), tiles[x,y])
print("\nSaved images in {} as [spot_name]-x-y.png.".format(OUTPUT_DIR))
# Prepare Data Structure
# Adapted from prepare-data-structure.py
# Creates the data structure required for the network
if args.command == "structure":
import os
from shutil import copyfile
NUCLEUS_DIR = os.path.abspath("")
# Setting input and output directories
if args.input:
INPUT_DIR = args.input
INPUT_DIR = "tiles"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
OUTPUT_DIR = "data"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# Creates a list with the paths of all tiles. Also stores just the
# filename itself with and without file extension
file_names = []
for path,_,files in os.walk(INPUT_DIR):
for f in files:
file_names.append(["{}/{}".format(path, f), f, f[:-4]])
print("\nFound {} images.".format(len(file_names)))
assert file_names != [], "No images found in input folder."
# The dataset needs to be stored inside another folder (default "own_data")
subset = "own_data"
# For each file creates the appropriate sub-folders and copies the file.
skip_count = 0
for i,info in enumerate(file_names):
print("Saving {}/{} images.".format(i+1, len(file_names)), end="\r")
dirname = "{}/{}/{}/images".format(OUTPUT_DIR, subset, info[2])
skip_count += 1
copyfile(info[0], "{}/{}".format(dirname, info[1]))
print("\n\nSaved dataset in {}/{}".format(OUTPUT_DIR, subset))
if skip_count > 0:
print("Skipped {} files because they already existed.".format(skip_count))
Python 默认缓冲 stdin、stdout 和 stderr。 print()
写信给 stdout
来自 https://stackoverflow.com/a/14258511/5666087 :
Python opens the stdin, -out and -error streams in a buffered mode; it'll read or write in larger chunks, keeping data in memory until a threshold is reached.
the documentation想要查询更多的信息。如果您有多个
关于python - SLURM批处理脚本不执行Python脚本,不返回错误信息,不停止运行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63414318/
