数据集文件处理-重命名文件
- (1)很多数据集合的照片是取自视频流,名称相对十分不规律,类似如下62fd8bf4d53a1b94fbac16738406f10b.jpg 1 0bdec5cccbcade6b6e94087cb5509d98.jpg 1。为了方便管理与处理,要对几万张图片重命名。图片全保存在一个文件夹里面,而另外的label标签种类数据全在一个txt文本文件里面(如图)。
- (2)在此,我需要先将图片与label标签数据的名称进行匹配,然后统一重命名。最后效果如下图。
接下来直接上代码:
# -*- coding:utf-8 -*-
import os
class ImageRename():
def __init__(self):
self.path = 'F:\\18\\eclipse-workspace\\img\\检测\\images'
def rename(self):
txt_file = open('F:\\18\\eclipse-workspace\\img\\检测\\trainf.txt', 'r') # 原始labels.txt的地址
filelist = os.listdir(self.path)
total_num = len(filelist)
piclist = []
for parent, dirnames, filenames in os.walk(self.path):
for pic_name in filenames:
pic_name = pic_name[:-4]
piclist.append(pic_name)
pic_set = set(piclist)
i = 0
for eachline in txt_file:
data = eachline.strip().split(',')
filename = data[0]
filename = filename[:-4]
for each_pic in pic_set:
if each_pic == filename:
for item in filelist:
item_name=item[:-4]
if item_name==filename:
if item.endswith('.JPG'):
src = os.path.join(os.path.abspath(self.path), item)
dst = os.path.join(os.path.abspath(self.path), '0000' + format(str(i), '0>3s') + '.jpeg')
os.rename(src, dst)
# print('converting %s to %s ...' % (src, dst))
txt_path = 'F:\\18\\eclipse-workspace\\img\\检测\\train_f.txt' # 生成的txt标注文件地址
txt = open(txt_path, 'a')
new_line = '0000' + format(str(i), '0>3s') + '.JPG' + ' ' + data[1]
txt.writelines(new_line)
txt.write('\n')
txt.close()
i = i + 1
elif item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.path), item)
dst = os.path.join(os.path.abspath(self.path), '0000' + format(str(i), '0>3s') + '.jpg')
os.rename(src, dst)
# print('converting %s to %s ...' % (src, dst))
txt_path = 'F:\\18\\eclipse-workspace\\img\\检测\\'+'train_f'+'.txt' # 生成的txt标注文件地址
txt = open(txt_path, 'a')
new_line = '0000' + format(str(i), '0>3s') + '.jpg' + ' ' + data[1]
txt.writelines(new_line)
txt.write('\n')
txt.close()
i = i + 1
txt_file.close()
print( 'total %d to rename & converted %d jpgs' % (total_num, i))
if __name__ == '__main__':
newname = ImageRename()
newname.rename()