如何快速的做网站/精准推广
# eg9-train-val-txt.py
#_*_ coding:utf8 _*_
##################################################################### 定义一个类,这个类将用于批量处理图像,包括若干功能:## (1)统一图像的格式为jpg。## (2)将多个图片文件夹构造成一个用于图像分类的数据集,产生txt文件,每一行的格式为:图片路径 标签## (3)txt文件按照7:3的比例分为训练集和测试集,并随机打乱顺序###################################################################
importosimportsysimportshutilimportcv2importrandom#类的定义
classGeneDataset():def __init__(self, rootdir):
self.rootdir=rootdir
self.subdirs=[]
self.subdirimages=[]
self.numclasses=0
self.lines=[]deflooksubdir(self):
list_dirs=os.walk(self.rootdir)for root, dirs, files inlist_dirs:for d indirs:
self.subdirs.append(os.path.join(root,d))print( "subdir=",os.path.join(root,d) ) #print "subdir=",os.path.join(root,d)
self.numclasses = self.numclasses + 1
defreformat(self):
label=0for subdir inself.subdirs:
list_dirs=os.walk(subdir)for root, dirs, files inlist_dirs:for f infiles:
srcname=os.path.join(root,f)print( "srcname",srcname ) #print "srcname",srcname
srcformat = srcname.split('.')[-1]if srcformat is not ".jpg":
img=cv2.imread(srcname)
newname= srcname.replace(srcformat,'jpg')print( "newname=",newname ) #print "newname=",newname
cv2.imwrite(newname,img)
self.lines.append(newname+' '+str(label)+'\n')
os.remove(srcname)
label= label + 1
defsplit_train_val(self,trainfile,testfile):iflen(self.lines):
random.shuffle(self.lines)
ftrainfile= open(trainfile,'w')
ftestfile= open(testfile,'w')
trainlength= int(0.7*len(self.lines))for i inrange(0,trainlength):
ftrainfile.write(self.lines[i])for i inrange(trainlength,len(self.lines)):
ftestfile.write(self.lines[i])#类的创建和使用#myclassdataset = GeneDataset(sys.argv[1])
myclassdataset = GeneDataset("/home/u/wp_data/learn-sh/新图像/")
myclassdataset.looksubdir()
myclassdataset.reformat()
myclassdataset.split_train_val('train.txt','test.txt')