带batch_size的迭代器读取文件,解决内存不足的大数据处理问题!!!!!!!!!!完美解决

https://github.com/zhangbo2008/perfect_batch_generator_for_pyton

核心代码如下:

def bylineread(fimename,batchsize=1):
    batchsize=batchsize
    with open(fimename) as f:

        cnt=0
        out=[]
        line = f.readline()
        while line:

            out.append(line)
            cnt+=1
            if cnt==batchsize:
               yield out
               out=[]
               cnt=0
            line = f.readline()
        yield out  # 用来强制返回最后不成batch的数据.

#read是一个生成器对象
read = bylineread('1',batchsize=2)
while 1:
    try:
        print(next(read))
    except:
        print('over')
        break

def bylineread(fimename,batchsize=1):
	batchsize=batchsize
	with open(fimename) as f:

	cnt=0
	out=[]
	line = f.readline()
	while line:

	out.append(line)
	cnt+=1
	if cnt==batchsize:
	yield out
	out=[]
	cnt=0
	line = f.readline()
	yield out # 用来强制返回最后不成batch的数据.

	#read是一个生成器对象
	read = bylineread('1',batchsize=2)
	while 1:
	try:
	print(next(read))
	except:
	print('over')
	break

带batch_size的迭代器读取文件,解决内存不足的大数据处理问题!!!!!!!!!!完美解决

猜你喜欢