UnidentifiedImageError：无法识别图像文件

4回答

呼如林

尝试此功能来检查图像的格式是否正确。import osfrom PIL import Imagefolder_path = 'data\img'extensions = []for fldr in os.listdir(folder_path):    sub_folder_path = os.path.join(folder_path, fldr)    for filee in os.listdir(sub_folder_path):        file_path = os.path.join(sub_folder_path, filee)        print('** Path: {}  **'.format(file_path), end="\r", flush=True)        im = Image.open(file_path)        rgb_im = im.convert('RGB')        if filee.split('.')[1] not in extensions:            extensions.append(filee.split('.')[1])

智慧大石

我不知道这是否仍然相关，但对于将来遇到同样问题的人来说：在这种特定情况下，dog_cat 数据集中有两个损坏的文件：猫/666.jpg狗/11702.jpg只要删除它们就可以了。

侃侃尔雅

我以前遇到过这个问题。因此，我开发了一个 python 脚本来测试训练和测试目录中是否存在有效的图像文件。文件扩展名必须是 jpg、png、bmp 或 gif 之一，因此它首先检查正确的扩展名。然后它尝试使用 cv2 读取图像。如果未输入有效图像，则会创建异常。在每种情况下都会打印出错误的文件名。最后，名为 bad_list 的列表包含错误文件路径列表。注意目录必须名为“test”和“train”import osimport cv2bad_list=[]dir=r'c:\'PetImages'subdir_list=os.listdir(dir) # create a list of the sub directories in the directory ie train or testfor d in subdir_list:  # iterate through the sub directories train and test    dpath=os.path.join (dir, d) # create path to sub directory    if d in ['test', 'train']:        class_list=os.listdir(dpath) # list of classes ie dog or cat       # print (class_list)        for klass in class_list: # iterate through the two classes            class_path=os.path.join(dpath, klass) # path to class directory            #print(class_path)            file_list=os.listdir(class_path) # create list of files in class directory            for f in file_list: # iterate through the files                fpath=os.path.join (class_path,f)                index=f.rfind('.') # find index of period infilename                ext=f[index+1:] # get the files extension                if ext  not in ['jpg', 'png', 'bmp', 'gif']:                    print(f'file {fpath}  has an invalid extension {ext}')                    bad_list.append(fpath)                                    else:                    try:                        img=cv2.imread(fpath)                        size=img.shape                    except:                        print(f'file {fpath} is not a valid image file ')                        bad_list.append(fpath)                       print (bad_list)

慕尼黑5688855

我们也可以在每个错误实例处删除，而不是附加损坏的列表......import osfrom PIL import Imagefolder_path = r"C:\Users\ImageDatasets"extensions = []corupt_img_paths=[]for fldr in os.listdir(folder_path):    sub_folder_path = os.path.join(folder_path, fldr)    for filee in os.listdir(sub_folder_path):        file_path = os.path.join(sub_folder_path, filee)        print('** Path: {}  **'.format(file_path), end="\r", flush=True)        try:            im = Image.open(file_path)        except:            print(file_path)            os.remove(file_path)            continue        else:            rgb_im = im.convert('RGB')            if filee.split('.')[1] not in extensions:                extensions.append(filee.split('.')[1])