- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在尝试加载我自己的数据集,我使用自定义 Dataloader
读取图像和标签并将它们转换为 PyTorch 张量。然而,当 Dataloader
被实例化时,它返回字符串 x "image"
和 y "labels"
但不是实际值或读取时的张量 (iter
)
print(self.train_loader) # shows a Tensor object
tic = time.time()
with tqdm(total=self.num_train) as pbar:
for i, (x, y) in enumerate(self.train_loader): # x and y are returned as string (where it fails)
if self.use_gpu:
x, y = x.cuda(), y.cuda()
x, y = Variable(x), Variable(y)
这是 dataloader.py
的样子:
from __future__ import print_function, division #ds
import numpy as np
from utils import plot_images
import os #ds
import pandas as pd #ds
from skimage import io, transform #ds
import torch
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader #ds
from torchvision import transforms
from torchvision import utils #ds
from torch.utils.data.sampler import SubsetRandomSampler
class CDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.frame)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir,
self.frame.iloc[idx, 0]+'.jpg')
image = io.imread(img_name)
# image = image.transpose((2, 0, 1))
labels = np.array(self.frame.iloc[idx, 1])#.as_matrix() #ds
#landmarks = landmarks.astype('float').reshape(-1, 2)
#print(image.shape)
#print(img_name,labels)
sample = {'image': image, 'labels': labels}
if self.transform:
sample = self.transform(sample)
return sample
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, labels = sample['image'], sample['labels']
#print(image)
#print(labels)
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
#print(image.shape)
#print((torch.from_numpy(image)))
#print((torch.from_numpy(labels)))
return {'image': torch.from_numpy(image),
'labels': torch.from_numpy(labels)}
def get_train_valid_loader(data_dir,
batch_size,
random_seed,
#valid_size=0.1, #ds
#shuffle=True,
show_sample=False,
num_workers=4,
pin_memory=False):
"""
Utility function for loading and returning train and valid
multi-process iterators over the MNIST dataset. A sample
9x9 grid of the images can be optionally displayed.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Args
----
- data_dir: path directory to the dataset.
- batch_size: how many samples per batch to load.
- random_seed: fix seed for reproducibility.
- #ds valid_size: percentage split of the training set used for
the validation set. Should be a float in the range [0, 1].
In the paper, this number is set to 0.1.
- shuffle: whether to shuffle the train/validation indices.
- show_sample: plot 9x9 sample grid of the dataset.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- train_loader: training set iterator.
- valid_loader: validation set iterator.
"""
#ds
#error_msg = "[!] valid_size should be in the range [0, 1]."
#assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
#ds
# define transforms
#normalize = transforms.Normalize((0.1307,), (0.3081,))
trans = transforms.Compose([
ToTensor(), #normalize,
])
# load train dataset
#train_dataset = datasets.MNIST(
# data_dir, train=True, download=True, transform=trans
#)
train_dataset = CDataset(csv_file='/home/Desktop/6June17/util/train.csv',
root_dir='/home/caffe/data/images/',transform=trans)
# load validation dataset
#valid_dataset = datasets.MNIST( #ds
# data_dir, train=True, download=True, transform=trans #ds
#)
valid_dataset = CDataset(csv_file='/home/Desktop/6June17/util/eval.csv',
root_dir='/home/caffe/data/images/',transform=trans)
num_train = len(train_dataset)
train_indices = list(range(num_train))
#ds split = int(np.floor(valid_size * num_train))
num_valid = len(valid_dataset) #ds
valid_indices = list(range(num_valid)) #ds
#if shuffle:
# np.random.seed(random_seed)
# np.random.shuffle(indices)
#ds train_idx, valid_idx = indices[split:], indices[:split]
train_idx = train_indices #ds
valid_idx = valid_indices #ds
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
print(train_loader)
valid_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=batch_size, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
# visualize some images
if show_sample:
sample_loader = torch.utils.data.DataLoader(
dataset, batch_size=9, #shuffle=shuffle,
num_workers=num_workers, pin_memory=pin_memory
)
data_iter = iter(sample_loader)
images, labels = data_iter.next()
X = images.numpy()
X = np.transpose(X, [0, 2, 3, 1])
plot_images(X, labels)
return (train_loader, valid_loader)
def get_test_loader(data_dir,
batch_size,
num_workers=4,
pin_memory=False):
"""
Utility function for loading and returning a multi-process
test iterator over the MNIST dataset.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Args
----
- data_dir: path directory to the dataset.
- batch_size: how many samples per batch to load.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- data_loader: test set iterator.
"""
# define transforms
#normalize = transforms.Normalize((0.1307,), (0.3081,))
trans = transforms.Compose([
ToTensor(), #normalize,
])
# load dataset
#dataset = datasets.MNIST(
# data_dir, train=False, download=True, transform=trans
#)
test_dataset = CDataset(csv_file='/home/Desktop/6June17/util/test.csv',
root_dir='/home/caffe/data/images/',transform=trans)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=batch_size, shuffle=False,
num_workers=num_workers, pin_memory=pin_memory,
)
return test_loader
#for i_batch, sample_batched in enumerate(dataloader):
# print(i_batch, sample_batched['image'].size(),
# sample_batched['landmarks'].size())
# # observe 4th batch and stop.
# if i_batch == 3:
# plt.figure()
# show_landmarks_batch(sample_batched)
# plt.axis('off')
# plt.ioff()
# plt.show()
# break
一个最小的工作示例很难在这里发布,但基本上我正在尝试修改这个项目 http://torch.ch/blog/2015/09/21/rmva.html与 MNIST 一起顺利工作。我只是想用我自己的数据集和上面使用的自定义 dataloader.py
来运行它。
它像这样实例化一个Dataloader
:
在 trainer.py
中:
if config.is_train:
self.train_loader = data_loader[0]
self.valid_loader = data_loader[1]
self.num_train = len(self.train_loader.sampler.indices)
self.num_valid = len(self.valid_loader.sampler.indices)
-> 从 main.py
运行:
if config.is_train:
data_loader = get_train_valid_loader(
config.data_dir, config.batch_size,
config.random_seed, #config.valid_size,
#config.shuffle,
config.show_sample, **kwargs
)
最佳答案
您没有正确使用 python 的 enumerate()
。 (x, y)
当前分配了批处理字典的 2 个键,即字符串 "image"
和 "labels"
。这应该可以解决您的问题:
for i, batch in enumerate(self.train_loader):
x, y = batch["image"], batch["labels"]
# ...
关于image - PyTorch 自定义数据集数据加载器返回字符串(键)而不是张量,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50878650/
如何使用 SPListCollection.Add(String, String, String, String, Int32, String, SPListTemplate.QuickLaunchO
我刚刚开始使用 C++ 并且对 C# 有一些经验,所以我有一些一般的编程经验。然而,似乎我马上就被击落了。我试过在谷歌上寻找,以免浪费任何人的时间,但没有结果。 int main(int argc,
这个问题已经有答案了: In Java 8 how do I transform a Map to another Map using a lambda? (8 个回答) Convert a Map>
我正在使用 node + typescript 和集成的 swagger 进行 API 调用。我 Swagger 提出以下要求 http://localhost:3033/employees/sear
我是 C++ 容器模板的新手。我收集了一些记录。每条记录都有一个唯一的名称,以及一个字段/值对列表。将按名称访问记录。字段/值对的顺序很重要。因此我设计如下: typedef string
我需要这两种方法,但j2me没有,我找到了一个replaceall();但这是 replaceall(string,string,string); 第二个方法是SringBuffer但在j2me中它没
If string is an alias of String in the .net framework为什么会发生这种情况,我应该如何解释它: type JustAString = string
我有两个列表(或字符串):一个大,另一个小。 我想检查较大的(A)是否包含小的(B)。 我的期望如下: 案例 1. B 是 A 的子集 A = [1,2,3] B = [1,2] contains(A
我有一个似乎无法解决的小问题。 这里...我有一个像这样创建的输入... var input = $(''); 如果我这样做......一切都很好 $(this).append(input); 如果我
我有以下代码片段 string[] lines = objects.Split(new string[] { "\r\n", "\n" }, StringSplitOptions.No
这可能真的很简单,但我已经坚持了一段时间了。 我正在尝试输出一个字符串,然后输出一个带有两位小数的 double ,后跟另一个字符串,这是我的代码。 System.out.printf("成本:%.2
以下是 Cloud Firestore 列表查询中的示例之一 citiesRef.where("state", ">=", "CA").where("state", "= 字符串,我们在Stack O
我正在尝试检查一个字符串是否包含在另一个字符串中。后面的代码非常简单。我怎样才能在 jquery 中做到这一点? function deleteRow(locName, locID) { if
这个问题在这里已经有了答案: How to implement big int in C++ (14 个答案) 关闭 9 年前。 我有 2 个字符串,都只包含数字。这些数字大于 uint64_t 的
我有一个带有自定义转换器的 Dozer 映射: com.xyz.Customer com.xyz.CustomerDAO customerName
这个问题在这里已经有了答案: How do I compare strings in Java? (23 个回答) 关闭 6 年前。 我想了解字符串池的工作原理以及一个字符串等于另一个字符串的规则是
我已阅读 this问题和其他一些问题。但它们与我的问题有些无关 对于 UILabel 如果你不指定 ? 或 ! 你会得到这样的错误: @IBOutlet property has non-option
这两种方法中哪一种在理论上更快,为什么? (指向字符串的指针必须是常量。) destination[count] 和 *destination++ 之间的确切区别是什么? destination[co
This question already has answers here: Closed 11 years ago. Possible Duplicates: Is String.Format a
我有一个Stream一个文件的,现在我想将相同的单词组合成 Map这很重要,这个词在 Stream 中出现的频率. 我知道我必须使用 collect(Collectors.groupingBy(..)
我是一名优秀的程序员,十分优秀!