数据处理与数据集制备
目录
一、图片批量重命名
文件夹结构:
- source
- animals
- cat
- cattle
-
- dog
- horse
- pig
- fruits
- apple
- banana
- durian
- grape
- orange
- vehicles
- bus
- car
- plane
- ship
- train
- animals
- 实现代码:
import os import cv2 as cv sourcePath = '../../../DataSet/source/' animalPath = sourcePath + 'animals/' fruitPath = sourcePath + 'fruits/' vehiclePath = sourcePath + 'vehicles/' # 水果图片重命名 for folder_list in os.listdir(fruitPath): count = 1 # 读取每个目录下的子文件夹 subfolder = fruitPath + folder_list + '/' for file in os.listdir(subfolder): old_name = subfolder + file new_name = subfolder + "%s_%d.jpg" % (folder_list, count) print(old_name, "====>", new_name) os.rename(old_name, new_name) count = count + 1 # 动物图片重命名 for folder_list in os.listdir(animalPath): count = 1 # 读取每个目录下的子文件夹 subfolder = animalPath + folder_list + '/' for file in os.listdir(subfolder): old_name = subfolder + file new_name = subfolder + "%s_%d.jpg" % (folder_list, count) print(old_name, "====>", new_name) os.rename(old_name, new_name) count = count + 1 # 交通图片重命名 for folder_list in os.listdir(vehiclePath): count = 1 # 读取每个目录下的子文件夹 subfolder = vehiclePath + folder_list + '/' for file in os.listdir(subfolder): old_name = subfolder + file new_name = subfolder + "%s_%d.jpg" % (folder_list, count) print(old_name, "====>", new_name) os.rename(old_name, new_name) count = count + 1
实现效果:
二、数据增强+高斯加噪
比赛的时候采用摄像头拍摄图像识别,拍摄的图像包含很多噪点,而原数据集为无噪声的原图。例如一张狗的照片拍摄时受到室内光线影响,有很多红色噪点,就容易被识别为苹果。去年出现过这种情况,因此今年针对解决,对数据集进行加噪处理。
- 实现代码:
# 图像加噪 # 实际拍摄的图片会存在很多噪声 # 与计算机原图差距很大 # 因此需要对原图进行加噪处理 import numpy as np import cv2 as cv import os from keras.preprocessing.image import ImageDataGenerator from keras.preprocessing.image import img_to_array from keras.preprocessing.image import load_img # 加噪函数 def Gasuss_Noise(image, mean=0, var=0.001): ''' 添加高斯噪声 image: 原始图像 mean : 均值 var : 方差, 越大,噪声越大 ''' image = cv.imread(image) image = np.array(image/255, dtype=float) # 将原始图像的像素值进行归一化,除以255使得像素值在0-1之间 noise = np.random.normal(mean, var ** 0.5, image.shape) # 创建一个均值为mean,方差为var呈高斯分布的图像矩阵 out = image + noise # 将噪声和原始图像进行相加得到加噪后的图像 if out.min() < 0: low_clip = -1. else: low_clip = 0. out = np.clip(out, low_clip, 1.0) # clip函数将元素的大小限制在了low_clip和1之间了,小于的用low_clip代替,大于1的用1代替 out = np.uint8(out * 255) # 解除归一化,乘以255将加噪后的图像的像素值恢复 # cv.imshow("gasuss", out) noise = noise * 255 return [noise, out] # 图像增强函数 def Date_Enhancement(img_input_path, img_output_path): image = load_img(img_input_path) im1 = image.point(lambda p: p * 0.6) # im1.show() im1 = img_to_array(im1) # 图像转为数组 im1 = np.expand_dims(im1, axis=0) # 增加一个维度 img_dag = ImageDataGenerator( rotation_range=10, width_shift_range=0.001, # horizen offset height_shift_range=0.001, # vertical offset shear_range=0.02, # zoom_range=[0.6, 0.9], brightness_range = [0.9, 1.1], horizontal_flip=False, # 水平翻转 fill_mode="constant", cval=40 ) # 旋转,宽度移动范围,高度移动范围,裁剪范围,水平翻转开启,填充模式 img_generator = img_dag.flow(im1, batch_size=1, save_to_dir= img_output_path, save_prefix= "image", save_format= "jpg") # 测试一张图像bath_size=1 count = 0 # 计数器 for raw_pic_path in img_generator: count += 1 if count == 1: # 生成多少个样本后退出 break #################################################################### # 原地址 sourcePath = '../../../DataSet/source/' animalPath = sourcePath + 'animals/' fruitPath = sourcePath + 'fruits/' vehiclePath = sourcePath + 'vehicles/' # 中间地址 # 用于保存第一步处理后的图片 middlePath = '../../../DataSet/middle/' animalMiddlePath = middlePath + 'animals/' fruitMiddlePath = middlePath + 'fruits/' vehicleMiddlePath = middlePath + 'vehicles/' # 目标地址 # 保存第二步处理后的图片数据 processPath = '../../../DataSet/process/' animalProcessPath = processPath + 'animals/' fruitProcessPath = processPath + 'fruits/' vehicleProcessPath = processPath + 'vehicles/' for folder_name in os.listdir(fruitPath): rename_count = 1 # 读取每个目录下的子文件夹 sub_folder = fruitPath + folder_name + '/' middle_save_path = fruitMiddlePath + folder_name + '/' # 数据增强 for raw_pic in os.listdir(sub_folder): raw_pic_path = sub_folder + raw_pic for i in range(0, 10): Date_Enhancement(raw_pic_path, middle_save_path) print(folder_name, "Enhance Done") # 高斯加噪 for middle_pic in os.listdir(middle_save_path): middle_save_name = middle_save_path + middle_pic final_save_path = fruitProcessPath + folder_name + '/' final_save_name = final_save_path + "%s_%d.jpg" % (folder_name, rename_count) noise, out = Gasuss_Noise(middle_save_name, mean=0, var=0.002) cv.imwrite(final_save_name, out) rename_count = rename_count + 1 print(folder_name, "Process Done") for folder_name in os.listdir(animalPath): rename_count = 1 # 读取每个目录下的子文件夹 sub_folder = animalPath + folder_name + '/' middle_save_path = animalMiddlePath + folder_name + '/' # 数据增强 for raw_pic in os.listdir(sub_folder): raw_pic_path = sub_folder + raw_pic for i in range(0, 10): Date_Enhancement(raw_pic_path, middle_save_path) print(folder_name, "Enhance Done") # 高斯加噪 for middle_pic in os.listdir(middle_save_path): middle_save_name = middle_save_path + middle_pic final_save_path = animalProcessPath + folder_name + '/' final_save_name = final_save_path + "%s_%d.jpg" % (folder_name, rename_count) noise, out = Gasuss_Noise(middle_save_name, mean=0, var=0.002) cv.imwrite(final_save_name, out) rename_count = rename_count + 1 print(folder_name, "Process Done") for folder_name in os.listdir(vehiclePath): rename_count = 1 # 读取每个目录下的子文件夹 sub_folder = vehiclePath + folder_name + '/' middle_save_path = vehicleMiddlePath + folder_name + '/' # 数据增强 for raw_pic in os.listdir(sub_folder): rename_count = 1 raw_pic_path = sub_folder + raw_pic for i in range(0, 10): Date_Enhancement(raw_pic_path, middle_save_path) print(folder_name, "Enhance Done") # 高斯加噪 for middle_pic in os.listdir(middle_save_path): middle_save_name = middle_save_path + middle_pic final_save_path = vehicleProcessPath + folder_name + '/' final_save_name = final_save_path + "%s_%d.jpg" % (folder_name, rename_count) noise, out = Gasuss_Noise(middle_save_name, mean=0, var=0.002) cv.imwrite(final_save_name, out) rename_count = rename_count + 1 print(folder_name, "Process Done")
三、数据集制备
【说明】python访问文件/文件夹的顺序是按照首字母的排序进行的,于是标签的顺序为:
- 实现代码:
import os import cv2 as cv import numpy as np make = True check = True # 图片地址 picPath = '../../../DataSet/process/' if __name__ == "__main__": if make: all_data = [] all_label = [] i = -1 for fruit_animal_vehicle in os.listdir(picPath): for apple_cat_bus in os.listdir(picPath + fruit_animal_vehicle + '/'): i = i + 1 for pic in os.listdir(picPath + fruit_animal_vehicle + '/' + apple_cat_bus + '/'): extension = os.path.splitext(pic)[-1] if extension == '.jpg': img = cv.imread(picPath + fruit_animal_vehicle + '/' + apple_cat_bus + '/' + pic) # 读取图片数据到img ##连接两个路径名组件 # cv2.imread 读取的图片数据是 BGR 格式; try: # 如果try发生异常 img = cv.resize(img, (32, 32))[..., (2, 1, 0)] # BGR 2 RGB all_data.append(img) # .append方法在数组尾部加上img all_label.append(i) # .append方法 except: # 执行except continue all_data = np.asarray(all_data) all_label = np.asarray(all_label) np.save("../../../DataSet/dataSet/pic", all_data) np.save("../../../DataSet/dataSet/label", all_label) if check: x = np.load("../../../DataSet/dataSet/pic.npy") y = np.load("../../../DataSet/dataSet/label.npy") label = ["cat", "cattle", "dog", "horse", "pig"] \ + ["apple", "banana", "durian", "grape", "orange"] \ + ["bus", "car", "plane", "ship", "train"] count = 0 for d, idx in zip(x, y): ## 将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 print("Class %s %d" % (label[idx], count)) d = cv.resize(d, (395, 395))[..., (2, 1, 0)] cv.imshow("img", d) count = count + 1 cv.waitKey(0)
评论(0)
您还未登录,请登录后发表或查看评论