mnist = tf.keras.datasets.mnist mnist.load_data() ConnectionRefusedError

keras.datasets下载数据集时，由于文件是存储在亚马逊的服务器上，运行代码时一般会报如下错误：

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
---------------------------------------------------------------------------
ConnectionRefusedError                    Traceback (most recent call last)
...
Exception: URL fetch failure on https://s3.amazonaws.com/img-datasets/mnist.npz: None -- [Errno 111] Connection refused

我的解决办法：由于我的浏览器上配置有vpn，我先用浏览器打开上述网址下载文件，将文件放置在~/.keras/datasets目录下即可。
解决思路：发现上述报错时，由错误信息可以知道，是无法链接到亚马逊的网址导致无法下载数据。我的电脑是ubuntu系统，vpn只有浏览器上可以用，于是我先用浏览器将文件下载下来，这时可以参考keras的源码来解析文件：

f = np.load(path)
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()
return (x_train, y_train), (x_test, y_test)

而且大部分网上给的解决办法就是这样，但每个数据集的解析办法不是完全一样，这样每次都要重写一次解析文件的代码，比较麻烦，于是我就查看了load_data方法里的get_file方法，该方法是用来下载文件的方法：

@tf_export('keras.utils.get_file')
def get_file(fname,
             origin,
             untar=False,
             md5_hash=None,
             file_hash=None,
             cache_subdir='datasets',
             hash_algorithm='auto',
             extract=False,
             archive_format='auto',
             cache_dir=None):
  if cache_dir is None:
    cache_dir = os.path.join(os.path.expanduser('~'), '.keras')
  if md5_hash is not None and file_hash is None:
    file_hash = md5_hash
    hash_algorithm = 'md5'
  datadir_base = os.path.expanduser(cache_dir)
  if not os.access(datadir_base, os.W_OK):
    datadir_base = os.path.join('/tmp', '.keras')
  datadir = os.path.join(datadir_base, cache_subdir)
  if not os.path.exists(datadir):
    os.makedirs(datadir)

  if untar:
    untar_fpath = os.path.join(datadir, fname)
    fpath = untar_fpath + '.tar.gz'
  else:
    fpath = os.path.join(datadir, fname)

  download = False
  if os.path.exists(fpath):
    # File found; verify integrity if a hash was provided.
    if file_hash is not None:
      if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
        print('A local file was found, but it seems to be '
              'incomplete or outdated because the ' + hash_algorithm +
              ' file hash does not match the original value of ' + file_hash +
              ' so we will re-download the data.')
        download = True
  else:
    download = True

  if download:
    print('Downloading data from', origin)

    class ProgressTracker(object):
      # Maintain progbar for the lifetime of download.
      # This design was chosen for Python 2.7 compatibility.
      progbar = None

    def dl_progress(count, block_size, total_size):
      if ProgressTracker.progbar is None:
        if total_size is -1:
          total_size = None
        ProgressTracker.progbar = Progbar(total_size)
      else:
        ProgressTracker.progbar.update(count * block_size)

    error_msg = 'URL fetch failure on {}: {} -- {}'
    try:
      try:
        urlretrieve(origin, fpath, dl_progress)
      except URLError as e:
        raise Exception(error_msg.format(origin, e.errno, e.reason))
      except HTTPError as e:
        raise Exception(error_msg.format(origin, e.code, e.msg))
    except (Exception, KeyboardInterrupt) as e:
      if os.path.exists(fpath):
        os.remove(fpath)
      raise
    ProgressTracker.progbar = None

  if untar:
    if not os.path.exists(untar_fpath):
      _extract_archive(fpath, datadir, archive_format='tar')
    return untar_fpath

  if extract:
    _extract_archive(fpath, datadir, archive_format)

  return fpath

由其中的：

  download = False
  if os.path.exists(fpath):
    # File found; verify integrity if a hash was provided.
    if file_hash is not None:
      if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
        print('A local file was found, but it seems to be '
              'incomplete or outdated because the ' + hash_algorithm +
              ' file hash does not match the original value of ' + file_hash +
              ' so we will re-download the data.')
        download = True
  else:
    download = True

可知，若文件已经存在，且hash验证通过，则不需要重新下载。
再查看上面的代码可知查找文件的路径是~/.keras/datasets，因此我们只需把下载的文件复制到该目录下即可，之后就不需要修改任何代码了。
这是以mnist数据集为例，其他的数据集也是一样的。

mnist = tf.keras.datasets.mnist mnist.load_data() ConnectionRefusedError

猜你喜欢