When deep learning processes dirty data that is too short in a video data set, it needs to access nested multi-level folders in batches, find the video data that is too short, and automatically delete these files. The implementation code is as follows:
import os
import cv2
# address of the files to process
rawpath = '/root/autodl-tmp/webvid/video10_20%'
video_len = 64
# to get dirs' path and filename
for root, dirs, files in os.walk(rawpath+'/'):
for f in files:
# a = root.rfind('/')
# pt: path of a single file
pt = root+'/'+f
cap = cv2.VideoCapture(pt)
flag = 0
while flag < video_len:
ret, img = cap.read()
if ret:
flag += 1
else:
# just raise error or delete the chosen file
# raise NotImplementedError(f"the video '{pt}' is too short")
print('rm -rf ' + pt)
os.system('rm -rf ' + pt)
break