此篇blog,承接 《hadoop 2.6 源码 解读之上传文件(-put)内部实现之FsShell init()篇》:https://blog.csdn.net/zhixingheyi_tian/article/details/80499773
入口处,FsShell run() 处
instance = commandFactory.getInstance(cmd);
if (instance == null) {
throw new UnknownCommandException();
}
exitCode = instance.run(Arrays.copyOfRange(argv, 1, argv.length));
这里获取的 instance 是 CopyCommands.Put 类对象
instance.run是逻辑主体
跳转至父类Command run方法
关键代码就是以下两行
processOptions(args);
processRawArguments(args);
processOptions 实现在CopyCommands.Put里
protected void processOptions(LinkedList<String> args) throws IOException {
CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, "f", "p", "l");
cf.parse(args);
setOverwrite(cf.getOpt("f"));
setPreserve(cf.getOpt("p"));
setLazyPersist(cf.getOpt("l"));
getRemoteDestination(args);
// should have a -r option
setRecursive(true);
}
processRawArguments 最终 跳进 CopyCommands.Put processArguments方法
//CopyCommands.Put
@Override
protected void processArguments(LinkedList<PathData> args)
throws IOException {
// NOTE: this logic should be better, mimics previous implementation
// 这段逻辑被跳过
if (args.size() == 1 && args.get(0).toString().equals("-")) {
copyStreamToTarget(System.in, getTargetPath(args.get(0)));
return;
}
super.processArguments(args);
}
}
// super.processArguments
// 父类的方法CommandWithDestination
@Override
protected void processArguments(LinkedList<PathData> args)
throws IOException {
// if more than one arg, the destination must be a directory
// if one arg, the dst must not exist or must be a directory
if (args.size() > 1) {
if (!dst.exists) {
throw new PathNotFoundException(dst.toString());
}
if (!dst.stat.isDirectory()) {
throw new PathIsNotDirectoryException(dst.toString());
}
} else if (dst.exists) {
if (!dst.stat.isDirectory() && !overwrite) {
throw new PathExistsException(dst.toString());
}
} else if (!dst.parentExists()) {
throw new PathNotFoundException(dst.toString());
}
super.processArguments(args);
}
跳转至 Command 的processArguments
protected void processArguments(LinkedList<PathData> args)
throws IOException {
for (PathData arg : args) {
try {
processArgument(arg);
} catch (IOException e) {
displayError(e);
}
}
}
跳转至 Command processArgument
protected void processArgument(PathData item) throws IOException {
if (item.exists) {
processPathArgument(item);
} else {
processNonexistentPath(item);
}
}
中间省略一些代码跳转环节
最后再跳到这里:
@Override
protected void processPath(PathData src) throws IOException {
processPath(src, getTargetPath(src));
}
点进去如下
//CommandWithDestination
protected void copyFileToTarget(PathData src, PathData target)
throws IOException {
final boolean preserveRawXattrs =
checkPathsForReservedRaw(src.path, target.path);
src.fs.setVerifyChecksum(verifyChecksum);
InputStream in = null;
try {
in = src.fs.open(src.path);
copyStreamToTarget(in, target);
preserveAttributes(src, target, preserveRawXattrs);
} finally {
IOUtils.closeStream(in);
}
}
copyStreamToTarget
//CommandWithDestination
protected void copyStreamToTarget(InputStream in, PathData target)
throws IOException {
if (target.exists && (target.stat.isDirectory() || !overwrite)) {
throw new PathExistsException(target.toString());
}
TargetFileSystem targetFs = new TargetFileSystem(target.fs);
try {
// If the copy is
// * successful, the temporary file will be renamed to the real path,
// * else the temporary file will be deleted.
PathData tempTarget = target.suffix("._COPYING_");
targetFs.setWriteChecksum(writeChecksum);
targetFs.writeStreamToFile(in, tempTarget, lazyPersist);
targetFs.rename(tempTarget, target);
} finally {
targetFs.close(); // last ditch effort to ensure temp file is removed
}
}
//targetFs.writeStreamToFile 写文件的关键操作
void writeStreamToFile(InputStream in, PathData target,
boolean lazyPersist) throws IOException {
FSDataOutputStream out = null;
try {
out = create(target, lazyPersist);
IOUtils.copyBytes(in, out, getConf(), true);
} finally {
IOUtils.closeStream(out); // just in case copyBytes didn't
}
}
在此可见 最后是在IOUtils.copyBytes进行文件写,其实两种文件流的拷贝
每次拷贝是有缓冲区的,读者注意积累,buf 默认大小为4096
public static void copyBytes(InputStream in, OutputStream out, int buffSize)
throws IOException {
PrintStream ps = out instanceof PrintStream ? (PrintStream)out : null;
//buffSize 默认是4096
byte buf[] = new byte[buffSize];
int bytesRead = in.read(buf);
while (bytesRead >= 0) {
out.write(buf, 0, bytesRead);
if ((ps != null) && ps.checkError()) {
throw new IOException("Unable to write to output stream.");
}
bytesRead = in.read(buf);
}
}
out.write最终调用
//FSDataOutputStream
public void write(byte b[], int off, int len) throws IOException {
out.write(b, off, len);
position += len; // update position
if (statistics != null) {
statistics.incrementBytesWritten(len);
}
}
现在回到了 写文件流操作了。
至此可以参阅 我的blog 《hadoop 2.6 源码 解读之写操作之总体流程篇》 : https://blog.csdn.net/zhixingheyi_tian/article/details/80313041