thinking in java (二十四) ----- IO之BufferedInputStream

BufferedInputStream介绍

BufferedInputStream是缓冲输入流，作用是为另外一个输入流添加缓冲功能，以及mark reset功能。

本质上，缓冲功能是通过一个内部缓冲区数组实现的，例如在新建某输入流对应的BufferedInputStream后，当我们通过read方法读取输入流的数据时，BufferedInputStream会将输入流的数据分批地填入到缓冲区中，每当缓冲区中的数据被读完以后，输入流会再次填充数据缓冲区，如此反复，直到我们读取完毕输出流数据。

源码分析

package java.io;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

public class BufferedInputStream extends FilterInputStream {

    // 默认的缓冲大小是8192字节
    // BufferedInputStream 会根据“缓冲区大小”来逐次的填充缓冲区；
    // 即，BufferedInputStream填充缓冲区，用户读取缓冲区，读完之后，BufferedInputStream会再次填充缓冲区。如此循环，直到读完数据...
    private static int defaultBufferSize = 8192;

    // 缓冲数组
    protected volatile byte buf[];

    // 缓存数组的原子更新器。
    // 该成员变量与buf数组的volatile关键字共同组成了buf数组的原子更新功能实现，
    // 即，在多线程中操作BufferedInputStream对象时，buf和bufUpdater都具有原子性(不同的线程访问到的数据都是相同的)
    private static final
        AtomicReferenceFieldUpdater<BufferedInputStream, byte[]> bufUpdater =
        AtomicReferenceFieldUpdater.newUpdater
        (BufferedInputStream.class,  byte[].class, "buf");

    // 当前缓冲区的有效字节数。
    // 注意，这里是指缓冲区的有效字节数，而不是输入流中的有效字节数。
    protected int count;

    // 当前缓冲区的位置索引
    // 注意，这里是指缓冲区的位置索引，而不是输入流中的位置索引。
    protected int pos;

    // 当前缓冲区的标记位置
    // markpos和reset()配合使用才有意义。操作步骤：
    // (01) 通过mark() 函数，保存pos的值到markpos中。
    // (02) 通过reset() 函数，会将pos的值重置为markpos。接着通过read()读取数据时，就会从mark()保存的位置开始读取。
    protected int markpos = -1;

    // marklimit是标记的最大值。
    // 关于marklimit的原理，我们在后面的fill()函数分析中会详细说明。这对理解BufferedInputStream相当重要。
    protected int marklimit;

    // 获取输入流
    private InputStream getInIfOpen() throws IOException {
        InputStream input = in;
        if (input == null)
            throw new IOException("Stream closed");
        return input;
    }

    // 获取缓冲
    private byte[] getBufIfOpen() throws IOException {
        byte[] buffer = buf;
        if (buffer == null)
            throw new IOException("Stream closed");
        return buffer;
    }

    // 构造函数：新建一个缓冲区大小为8192的BufferedInputStream
    public BufferedInputStream(InputStream in) {
        this(in, defaultBufferSize);
    }

    // 构造函数：新建指定缓冲区大小的BufferedInputStream
    public BufferedInputStream(InputStream in, int size) {
        super(in);
        if (size <= 0) {
            throw new IllegalArgumentException("Buffer size <= 0");
        }
        buf = new byte[size];
    }

    // 从“输入流”中读取数据，并填充到缓冲区中。
    // 后面会对该函数进行详细说明！
    private void fill() throws IOException {
        byte[] buffer = getBufIfOpen();
        if (markpos < 0)
            pos = 0;            /* no mark: throw away the buffer */
        else if (pos >= buffer.length)  /* no room left in buffer */
            if (markpos > 0) {  /* can throw away early part of the buffer */
                int sz = pos - markpos;
                System.arraycopy(buffer, markpos, buffer, 0, sz);
                pos = sz;
                markpos = 0;
            } else if (buffer.length >= marklimit) {
                markpos = -1;   /* buffer got too big, invalidate mark */
                pos = 0;        /* drop buffer contents */
            } else {            /* grow buffer */
                int nsz = pos * 2;
                if (nsz > marklimit)
                    nsz = marklimit;
                byte nbuf[] = new byte[nsz];
                System.arraycopy(buffer, 0, nbuf, 0, pos);
                if (!bufUpdater.compareAndSet(this, buffer, nbuf)) {
                    throw new IOException("Stream closed");
                }
                buffer = nbuf;
            }
        count = pos;
        int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
        if (n > 0)
            count = n + pos;
    }

    // 读取下一个字节
    public synchronized int read() throws IOException {
        // 若已经读完缓冲区中的数据，则调用fill()从输入流读取下一部分数据来填充缓冲区
        if (pos >= count) {
            fill();
            if (pos >= count)
                return -1;
        }
        // 从缓冲区中读取指定的字节
        return getBufIfOpen()[pos++] & 0xff;
    }

    // 将缓冲区中的数据写入到字节数组b中。off是字节数组b的起始位置，len是写入长度
    private int read1(byte[] b, int off, int len) throws IOException {
        int avail = count - pos;
        if (avail <= 0) {
            // 加速机制。
            // 如果读取的长度大于缓冲区的长度 并且没有markpos，
            // 则直接从原始输入流中进行读取，从而避免无谓的COPY（从原始输入流至缓冲区，读取缓冲区全部数据，清空缓冲区， 
            //  重新填入原始输入流数据）
            if (len >= getBufIfOpen().length && markpos < 0) {
                return getInIfOpen().read(b, off, len);
            }
            // 若已经读完缓冲区中的数据，则调用fill()从输入流读取下一部分数据来填充缓冲区
            fill();
            avail = count - pos;
            if (avail <= 0) return -1;
        }
        int cnt = (avail < len) ? avail : len;
        System.arraycopy(getBufIfOpen(), pos, b, off, cnt);
        pos += cnt;
        return cnt;
    }

    // 将缓冲区中的数据写入到字节数组b中。off是字节数组b的起始位置，len是写入长度
    public synchronized int read(byte b[], int off, int len)
        throws IOException
    {
        getBufIfOpen(); // Check for closed stream
        if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return 0;
        }

        // 读取到指定长度的数据才返回
        int n = 0;
        for (;;) {
            int nread = read1(b, off + n, len - n);
            if (nread <= 0)
                return (n == 0) ? nread : n;
            n += nread;
            if (n >= len)
                return n;
            // if not closed but no bytes available, return
            InputStream input = in;
            if (input != null && input.available() <= 0)
                return n;
        }
    }

    // 忽略n个字节
    public synchronized long skip(long n) throws IOException {
        getBufIfOpen(); // Check for closed stream
        if (n <= 0) {
            return 0;
        }
        long avail = count - pos;

        if (avail <= 0) {
            // If no mark position set then don't keep in buffer
            if (markpos <0)
                return getInIfOpen().skip(n);

            // Fill in buffer to save bytes for reset
            fill();
            avail = count - pos;
            if (avail <= 0)
                return 0;
        }

        long skipped = (avail < n) ? avail : n;
        pos += skipped;
        return skipped;
    }

    // 下一个字节是否存可读
    public synchronized int available() throws IOException {
        int n = count - pos;
        int avail = getInIfOpen().available();
        return n > (Integer.MAX_VALUE - avail)
                    ? Integer.MAX_VALUE
                    : n + avail;
    }

    // 标记“缓冲区”中当前位置。
    // readlimit是marklimit，关于marklimit的作用，参考后面的说明。
    public synchronized void mark(int readlimit) {
        marklimit = readlimit;
        markpos = pos;
    }

    // 将“缓冲区”中当前位置重置到mark()所标记的位置
    public synchronized void reset() throws IOException {
        getBufIfOpen(); // Cause exception if closed
        if (markpos < 0)
            throw new IOException("Resetting to invalid mark");
        pos = markpos;
    }

    public boolean markSupported() {
        return true;
    }

    // 关闭输入流
    public void close() throws IOException {
        byte[] buffer;
        while ( (buffer = buf) != null) {
            if (bufUpdater.compareAndSet(this, buffer, null)) {
                InputStream input = in;
                in = null;
                if (input != null)
                    input.close();
                return;
            }
            // Else retry in case a new buf was CASed in fill()
        }
    }
}

要想读懂BufferedInputStream，就首先要理解其思想，BufferedInputStream的作用是为其他输入流提供缓冲功能，创建BufferedInputStream时候，我们构造函数里面会有一个输入流作为参数，BufferedInputStream会将输入数据分批次读取，每次读取一部分到缓存中，操作完这部分缓冲数据以后，再从输入流中读取下一部分数据到缓存中。

为什么需要缓冲呢，因为缓存中的数据实际上是保存在内存中，而原始数据可能保存在硬盘等存储介质中，而我们知道从内存中读取数据的速度是从硬盘中读取数据速度的10倍以上。

那为什么不一次性吧数据全部读取到内存中呢？因为数据可能很大，读取的时间会很长，还有就是内存价格昂贵

下面我们对BufferedInputStream中最重要的方法fill()进行说明：fill（）源码如下

 private void fill() throws IOException {
        byte[] buffer = getBufIfOpen();
        if (markpos < 0)
            pos = 0;            /* no mark: throw away the buffer */
        else if (pos >= buffer.length)  /* no room left in buffer */
            if (markpos > 0) {  /* can throw away early part of the buffer */
                int sz = pos - markpos;
                System.arraycopy(buffer, markpos, buffer, 0, sz);
                pos = sz;
                markpos = 0;
            } else if (buffer.length >= marklimit) {
                markpos = -1;   /* buffer got too big, invalidate mark */
                pos = 0;        /* drop buffer contents */
            } else if (buffer.length >= MAX_BUFFER_SIZE) {
                throw new OutOfMemoryError("Required array size too large");
            } else {            /* grow buffer */
                int nsz = (pos <= MAX_BUFFER_SIZE - pos) ?
                        pos * 2 : MAX_BUFFER_SIZE;
                if (nsz > marklimit)
                    nsz = marklimit;
                byte nbuf[] = new byte[nsz];
                System.arraycopy(buffer, 0, nbuf, 0, pos);
                if (!bufUpdater.compareAndSet(this, buffer, nbuf)) {
                    // Can't replace buf if there was an async close.
                    // Note: This would need to be changed if fill()
                    // is ever made accessible to multiple threads.
                    // But for now, the only way CAS can fail is via close.
                    // assert buf == null;
                    throw new IOException("Stream closed");
                }
                buffer = nbuf;
            }
        count = pos;
        int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
        if (n > 0)
            count = n + pos;
    }

根据源码中的if条件，我们可以将fill的情况分为五种，

情况1：读取完buffer中的数据，并且buffer没有被标记

执行流程如下

（1）read()函数中调用fill()

（2）fill（）中的if(markpos<0)...

为了方便分析，我们将这种情况下的fill等价于以下代码

private void fill() throws IOException {
    byte[] buffer = getBufIfOpen();
    if (markpos < 0)
        pos = 0;

    count = pos;
    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
    if (n > 0)
        count = n + pos;
}

说明：这种情形发生的情况是-----输入流中有很长的数据，我们每次从中取出一部分到buffer中，每次我们读取完buffer中的数据之后，并且此时输入流没有被标记，那么就接着从输入流中读取下一部分的数据到buffer中。

其中判断是否读完是通过if(pos>=count)判断的，判断输入流有没有被标记是通过if（pos<0）判断

然后我们在捋一捋fill的代码

1，if(markpos < 0)，它的作用是判断“输入流是否被标记”，如果被标记了markpoos>=0，没否则markpos等于-1

2，在这种情况下，通过getInIfOpen（）获取输入流，接着从输入流中读取buffer.length个字节到buffer中

3，count=n+pos；这是根据从输入流中读取实际数据的多少，来更新buffer中数据的实际大小

情况2，读取完buffer中的数据，buffer的标记位置>0，并且buffer中没有多余的空间

执行流程如下，

1，read（）函数中调用fill（）

2，fill（）中的else if（pos >=buffer.length）...

3，fill（）中的if（markpos > 0）...

这种情况下的fill等价于下面的代码

private void fill() throws IOException {
    byte[] buffer = getBufIfOpen();
    if (markpos >= 0 && pos >= buffer.length) {
        if (markpos > 0) {
            int sz = pos - markpos;
            System.arraycopy(buffer, markpos, buffer, 0, sz);
            pos = sz;
            markpos = 0;
        }
    }

    count = pos;
    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
    if (n > 0)
        count = n + pos;
}

说明：这种情况发生的情况是-----输入流中还有很长的数据，我们每次从中读取一部分到buffer中进行操作，当我们读取完buffer中的数据之后，并且此时输入流存在标记时，那么就发生情况2，此时我们要保留“被标记位置”到buffer末尾的数据，然后再从输入流读取下一部分数据到buffer中。

其中判断是否读完buffer中的数据，通过if(pos >= count)判断，

判断输入流是否被标记，通过if(markpos<0)来判断

判断buffer中是否有多余的空间，通过if(pos >=buffer.length)来判断

理解这个思想以后，我们再分析代码

1， int sz = pos - markpos; 作用是“获取‘被标记位置’到‘buffer末尾’”的数据长度。

2，System.arraycopy(buffer, markpos, buffer, 0, sz); 作用是“将buffer中从markpos开始的数据”拷贝到buffer中(从位置0开始填充，填充长度是sz)。接着，将sz赋值给pos，即pos就是“被标记位置”到“buffer末尾”的数据长度。、

3， int n = getInIfOpen().read(buffer, pos, buffer.length - pos); 从输入流中读取出“buffer.length - pos”的数据，然后填充到buffer中。

4，通过第(02)和(03)步组合起来的buffer，就是包含了“原始buffer被标记位置到buffer末尾”的数据，也包含了“从输入流中新读取的数据”。

注意：执行过2以后吗，markpos的值由大于0变成了等于0

情况3，读取完buffer中的数据，buffer被标记位置=0.buffer中没有多余的空间，并且buffer.length>=marklimit。执行流程如下

执行流程如下，
(01) read() 函数中调用 fill()
(02) fill() 中的 else if (pos >= buffer.length) ...
(03) fill() 中的 else if (buffer.length >= marklimit) ...

为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：

private void fill() throws IOException {
    byte[] buffer = getBufIfOpen();
    if (markpos >= 0 && pos >= buffer.length) {
        if ( (markpos <= 0) && (buffer.length >= marklimit) ) {
            markpos = -1;   /* buffer got too big, invalidate mark */
            pos = 0;        /* drop buffer contents */
        }
    }

    count = pos;
    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
    if (n > 0)
        count = n + pos;
}

说明：这种情况处理非常简单，首先就是“取消标记”，即markpos = -1，然后设置初始化位置为0，即pos=0，最后再从输入流中读取下一部分数据到buffer中

情况4，读取完buffer中的数据，buffer被标记位置=0，buffer中没有多余的空间，并且buffer.length<marklimit

执行流程如下，
(01) read() 函数中调用 fill()
(02) fill() 中的 else if (pos >= buffer.length) ...
(03) fill() 中的 else { int nsz = pos * 2; ... }

为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：

private void fill() throws IOException {
    byte[] buffer = getBufIfOpen();
    if (markpos >= 0 && pos >= buffer.length) {
        if ( (markpos <= 0) && (buffer.length < marklimit) ) {
            int nsz = pos * 2;
            if (nsz > marklimit)
                nsz = marklimit;
            byte nbuf[] = new byte[nsz];
            System.arraycopy(buffer, 0, nbuf, 0, pos);
            if (!bufUpdater.compareAndSet(this, buffer, nbuf)) {
                throw new IOException("Stream closed");
            }
            buffer = nbuf;
        }
    }

    count = pos;
    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
    if (n > 0)
        count = n + pos;
}

说明：

这种情况的处理非常简单。
(01) 新建一个字节数组nbuf。nbuf的大小是“pos*2”和“marklimit”中较小的那个数。

int nsz = pos * 2;
if (nsz > marklimit)
    nsz = marklimit;
byte nbuf[] = new byte[nsz];

(02) 接着，将buffer中的数据拷贝到新数组nbuf中。通过System.arraycopy(buffer, 0, nbuf, 0, pos)
(03) 最后，从输入流读取部分新数据到buffer中。通过getInIfOpen().read(buffer, pos, buffer.length - pos);
注意：在这里，我们思考一个问题，“为什么需要marklimit，它的存在到底有什么意义？”我们结合“情况2”、“情况3”、“情况4”的情况来分析。

假设，marklimit是无限大的，而且我们设置了markpos。当我们从输入流中每读完一部分数据并读取下一部分数据时，都需要保存markpos所标记的数据；这就意味着，我们需要不断执行情况4中的操作，要将buffer的容量扩大……随着读取次数的增多，buffer会越来越大；这会导致我们占据的内存越来越大。所以，我们需要给出一个marklimit；当buffer>=marklimit时，就不再保存markpos的值了

情况5：除了上面四种情况外的情况
执行流程如下，
(01) read() 函数中调用 fill()
(02) fill() 中的 count = pos...

为了方便分析，我们将这种情况下fill()执行的操作等价于以下代码：
```
private void fill() throws IOException {
    byte[] buffer = getBufIfOpen();

    count = pos;
    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
    if (n > 0)
        count = n + pos;
}
```
说明：这种情况的处理非常简单。直接从输入流读取部分新数据到buffer中。

示例代码

关于BufferedInputStream中API的详细用法，参考示例代码(BufferedInputStreamTest.java)：

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.lang.SecurityException;

/**
 * BufferedInputStream 测试程序
 *
 * @author skywang
 */
public class BufferedInputStreamTest {

    private static final int LEN = 5;

    public static void main(String[] args) {
        testBufferedInputStream() ;
    }

    /**
     * BufferedInputStream的API测试函数
     */
    private static void testBufferedInputStream() {

        // 创建BufferedInputStream字节流，内容是ArrayLetters数组
        try {
            File file = new File("bufferedinputstream.txt");
            InputStream in =
                  new BufferedInputStream(
                      new FileInputStream(file), 512);

            // 从字节流中读取5个字节。“abcde”，a对应0x61，b对应0x62，依次类推...
            for (int i=0; i<LEN; i++) {
                // 若能继续读取下一个字节，则读取下一个字节
                if (in.available() >= 0) {
                    // 读取“字节流的下一个字节”
                    int tmp = in.read();
                    System.out.printf("%d : 0x%s\n", i, Integer.toHexString(tmp));
                }
            }

            // 若“该字节流”不支持标记功能，则直接退出
            if (!in.markSupported()) {
                System.out.println("make not supported!");
                return ;
            }
              
            // 标记“当前索引位置”，即标记第6个位置的元素--“f”
            // 1024对应marklimit
            in.mark(1024);

            // 跳过22个字节。
            in.skip(22);

            // 读取5个字节
            byte[] buf = new byte[LEN];
            in.read(buf, 0, LEN);
            // 将buf转换为String字符串。
            String str1 = new String(buf);
            System.out.printf("str1=%s\n", str1);

            // 重置“输入流的索引”为mark()所标记的位置，即重置到“f”处。
            in.reset();
            // 从“重置后的字节流”中读取5个字节到buf中。即读取“fghij”
            in.read(buf, 0, LEN);
            // 将buf转换为String字符串。
            String str2 = new String(buf);
            System.out.printf("str2=%s\n", str2);

            in.close();
       } catch (FileNotFoundException e) {
           e.printStackTrace();
       } catch (SecurityException e) {
           e.printStackTrace();
       } catch (IOException e) {
           e.printStackTrace();
       }
    }
}

程序中读取的bufferedinputstream.txt的内容如下：

abcdefghijklmnopqrstuvwxyz
0123456789
ABCDEFGHIJKLMNOPQRSTUVWXYZ

运行结果：

0 : 0x61
1 : 0x62
2 : 0x63
3 : 0x64
4 : 0x65
str1=01234
str2=fghij

原味：http://www.cnblogs.com/skywang12345/p/io_12.html