我正在使用 cupy 通过 pytorch 运行 cuda 代码。
我的环境是ubuntu 20,anaconda-python 3.7.6,nvidia-driver 440,cuda 10.2,cupy-cuda102,torch 1.4.0
首先我写了一个简单的主要代码
import data_load_test
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
def main():
dataset = data_load_test.DataLoadTest()
training_loader = DataLoader(dataset, batch_size=1)
with torch.cuda.device(0):
pbar = tqdm(training_loader)
for epoch in range(3):
for i, img in enumerate(pbar):
print("see the message")
if __name__ == "__main__":
main()
和这样的数据加载器。
from torch.utils.data import Dataset
import cv2
import cupy as cp
def read_cuda_file(cuda_path):
f = open(cuda_path, 'r')
source_line = ""
while True:
line = f.readline()
if not line: break
source_line = source_line + line
f.close()
return source_line
class DataLoadTest(Dataset):
def __init__(self):
source = read_cuda_file("cuda/cuda_code.cu")
cuda_source = '''{}'''.format(source)
module = cp.RawModule(code=cuda_source)
self.myfunc = module.get_function('myfunc')
self.input = cp.asarray(cv2.imread("hi.png",-1), cp.uint8)
h, w, c = self.input.shape
self.h = h
self.w = w
self.output = cp.zeros((w, h, 3), dtype=cp.uint8)
self.block_size = (32, 32)
self.grid_size = (h // self.block_size[1], w // self.block_size[0])
def __len__(self):
return 1
def __getitem__(self, idx):
self.myfunc(self.grid_size, self.block_size, (self.input, self.output, self.h, self.w))
return cp.asnumpy(self.output)
慕田峪7331174
相关分类