阿里云随笔(6)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u010255642/article/details/82953930

使用机器学习搭建深度学习实验时,通常需要在界面右侧设置读取目录、代码文件等参数。这些参数通过“—XXX”(XXX代表字符串)的形式传入,tf.flags提供了这个功能。
列出oss桶下所有的csv文件:

import tensorflow as tf
import os
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string('buckets', 'oss://myhaspl-ai.oss-cn-beijing-internal.aliyuncs.com/', '')  
tf.flags.DEFINE_string('batch_size', '15', 'batch大小')
files = tf.gfile.Glob(os.path.join(FLAGS.buckets,'*.csv')) # 如我想列出buckets下所有csv文件路径
with tf.Session() as sess:
    print files

阿里云机器学习PAI读取OSS文件

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 15 10:54:53 2018
@author: myhaspl
@email:[email protected]
阿里云读取文件
csv格式:怀孕次数、葡萄糖、血压、皮肤厚度,胰岛素,bmi,糖尿病血统函数,年龄,结果
"""

import tensorflow as tf
import os

sampleCount=200
testCount=10


g=tf.Graph()
with g.as_default():
    
       
    def inputFromFile(fileName,skipLines=1):
        #生成文件名队列
        fileNameQueue=tf.train.string_input_producer([fileName])
        #生成记录键值对
        reader=tf.TextLineReader(skip_header_lines=skipLines)
        key,value=reader.read(fileNameQueue)
        return value

    def getTestData(fileName,skipLines=1,n=10):
        #生成文件名队列
        testFileNameQueue=tf.train.string_input_producer([fileName])
        #生成记录键值对
        testReader=tf.TextLineReader(skip_header_lines=skipLines)
        testKey,testValue=testReader.read(testFileNameQueue)
        testRecordDefaults=[[1.],[1.],[1.],[1.],[1.],[1.],[1.],[1.],[1.]]
        testDecoded=tf.decode_csv(testValue,record_defaults=testRecordDefaults)
        pregnancies,glucose,bloodPressure,skinThickness,insulin,bmi,diabetespedigreefunction,age,outcome=tf.train.shuffle_batch(testDecoded,batch_size=n,capacity=1000,min_after_dequeue=1)    
        testFeatures=tf.transpose(tf.stack([pregnancies,glucose,bloodPressure,skinThickness,insulin,bmi,diabetespedigreefunction,age]))
        testY=tf.transpose([outcome])
        return (testFeatures,testY)
    
    def getNextBatch(n,values):
        recordDefaults=[[1.],[1.],[1.],[1.],[1.],[1.],[1.],[1.],[1.]]
        decoded=tf.decode_csv(values,record_defaults=recordDefaults)
        pregnancies,glucose,bloodPressure,skinThickness,insulin,bmi,diabetespedigreefunction,age,outcome=tf.train.shuffle_batch(decoded,batch_size=n,capacity=1000,min_after_dequeue=1)    
        features=tf.transpose(tf.stack([pregnancies,glucose,bloodPressure,skinThickness,insulin,bmi,diabetespedigreefunction,age]))
        y=tf.transpose([outcome])
        return (features,y) 
    
    with tf.name_scope("inputSample"): 
        samples=inputFromFile("oss://myhaspl-ai.oss-cn-beijing-internal.aliyuncs.com/diabetes.csv",1)
        inputDs=getNextBatch(sampleCount,samples)  


   
    with tf.name_scope("testSamples"):  
        testInputDs=getTestData("oss://myhaspl-ai.oss-cn-beijing-internal.aliyuncs.com/diabetes_test.csv")
       
      
 
   
with tf.Session(graph=g) as sess:    

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    

        
    sampleX,sampleY=sess.run(inputDs)
    testInputX,testInputY=sess.run(testInputDs)
    print sampleX,sampleY
    print testInputX,testInputY
    
    coord.request_stop()
    coord.join(threads) 

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/u010255642/article/details/82953930