Applier编译安装

        Applier是MySQL推出的MySQL数据库与HDFS数据的实时同步工具,Hive的数据实时同步同样适用。官方介绍可参见:http://dev.mysql.com/tech-resources/articles/mysql-hadoop-applier.html


环境:

        Applier当前的版本为0.1.0-alpha,只支持MySQL5.6及以上版本。需要的环境:
  • Hadoop1.0.4:hadoop需要开启append模式(hdfs-site.xml中dfs.support.append设为true)
  • libhdfs (it comes precompiled with Hadoop distros, ${HADOOP_HOME}/libhdfs/libhdfs.so)
  • cmake 2.6 or greater
  • libmysqlclient 5.6
  • gcc 4.6.3
  • MySQL Server 5.6
  • FindHDFS.cmake (cmake file to find libhdfs library while compiling. You can get a copy online)
  • FindJNI.cmake (optional, check if you already have one: $locate FindJNI.cmake)



相关文档:

        Applier实现以及安装使用的一个简单说明: http://innovating-technology.blogspot.com/2013/04/mysql-hadoop-applier-part-2.html
        demo视频: http://www.youtube.com/watch?v=mZRAtCu3M1g&feature=youtu.be

编译部署:

libhdfs

        在${HADOOP_HOME}下执行命令编译
ant compile-c++libhdfs -Dislibhdfs=true

${HOME_HADOOP}/build/c++/Linux-amd64-64/lib下面生成libhdfs的库文件

测试:
        以下是Hadoop官方提供的例子:
#include "hdfs.h" 

int main(int argc, char **argv) {

    //default是host,自行更改
    hdfsFS fs = hdfsConnect("default", 0);
    const char* writePath = "/tmp/testfile.txt";
    hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
    if(!writeFile) {
          fprintf(stderr, "Failed to open %s for writing!\n", writePath);
          exit(-1);
    }
    char* buffer = "Hello, World!";
    tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
    if (hdfsFlush(fs, writeFile)) {
           fprintf(stderr, "Failed to 'flush' %s\n", writePath); 
          exit(-1);
    }
   hdfsCloseFile(fs, writeFile);
}


        编译过程中有可能找不到某些库,需要指定路径,主要是libhdfs.so和java的libjvm.so
gcc testHDFS.c  -I${HADOOP_HOME}/src/c++/libhdfs -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -L${HADOOP_HOME}/c++/Linux-amd64-64/lib/ -lhdfs -L${JAVA_HOME}/jre/lib/amd64/server -ljvm -o testHDFS

        执行过程中可能会碰到这样的错误:
error while loading shared libraries: libhdfs.so.0: cannot open shared object file: No such file or directory
或
error while loading shared libraries: libjvm.so: cannot open shared object file: No such file or directory

        需要设置LD_LIBRARY_PATH路径
export LD_LIBRARY_PATH=${HADOOP_HOME}/c++/Linux-amd64-64/lib:${JAVA_HOME}/jre/lib/amd64/server:$LD_LIBRARY_PATH

        执行成功后,会在hdfs的/tmp下生成testfile.txt文件,内容为Hello,world!

MySQL5.6

        MySQL5.6下载地址: http://dev.mysql.com/get/Downloads/MySQL-5.6/mysql-5.6.16.tar.gz
        根据官方文档进行安装:
groupadd mysql
useradd -r -g mysql mysql
# Beginning of source-build specific instructions
tar zxvf mysql-VERSION.tar.gz
cd mysql-VERSION
cmake .
make
make install
# End of source-build specific instructions
# Postinstallation setup
cd /usr/local/mysql
chown -R mysql .
chgrp -R mysql .
scripts/mysql_install_db --user=mysql
chown -R root . 
chown -R mysql data
bin/mysqld_safe --user=mysql &
# Next command is optional
cp support-files/mysql.server /etc/init.d/mysql.server


Applier

        Applier下载地址: http://downloads.mysql.com/snapshots/pb/hadoop-applier/mysql-hadoop-applier-0.1.0-alpha.tar.gz

        Applier利用cmake生成Makefile,依赖FindHDFS.cmake和FindJNI.cmake,这个并没有包含在包中,需要自行下载,可参见: http://code.ohloh.net/project?pid=aj2Ue-0Ech4&prevcid=1&did=cmake_modules&cid=IzFytetI1gk&fp=392579&mp&projSelected=true&filterChecked
        放到cmake的Modules目录下

        需要注意的是,Hadoop1.x的头文件和库的目录较先前版本有一点变动,直接进行编译有可能找不到相关文件。Applier提供了FindHDFS.cmake的补丁
--- FindHDFS.cmake
+++ FindHDFS.cmake
@@ -11,6 +11,7 @@ exec_program(hadoop ARGS version OUTPUT_VARIABLE Hadoop_VERSION
 # currently only looking in HADOOP_HOME
 find_path(HDFS_INCLUDE_DIR hdfs.h PATHS
   $ENV{HADOOP_HOME}/include/
+  $ENV{HADOOP_HOME}/src/c++/libhdfs/
   # make sure we don't accidentally pick up a different version
   NO_DEFAULT_PATH
 )
@@ -26,9 +27,9 @@ endif()
 message(STATUS "Architecture: ${arch_hint}")

 if ("${arch_hint}" STREQUAL "x64")
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
+  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/c++/Linux-amd64-64/lib)
 else ()
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
+  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/c++/Linux-i386-32/lib)
 endif ()

 message(STATUS "HDFS_LIB_PATHS: ${HDFS_LIB_PATHS}")

        将该补丁保存为FindHDFS.patch,在FindHDFS.cmake目录下执行patch -p0 < FindHDFS.patch

编译libreplication
        Applier目录下执行命令进行编译:
export MYSQL_DIR=<path of mysql directory or libmysql>
mkdir build
cd build
cmake .. -DCMAKE_MODULE_PATH:String=<path of cmake Modules>
make -j4

引用
Applier依赖MySQL5.6,MySQL5.6较之前版本在接口参数类型上有所变动,使用之前版本的头文件会报类型不一致的错误
务必保证/usr/local/mysql/include下的头文件为5.6版本


编译成功之后在build/lib目录下会生成libreplication.so文件
下面编译examples中的mysql2hdfs例子
注意:不要在example的源码目录中进行编译。在build/example/mysql2hdfs目录中执行make即可。

编译过程中有可能会出现如下的错误:
引用

/usr/bin/ld: warning: libmawt.so, needed by /home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so, not found (try using -rpath or -rpath-link)
/home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so: undefined reference to `awt_FreeDrawingSurface@SUNWprivate_1.1'
/home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so: undefined reference to `awt_Unlock@SUNWprivate_1.1'
/home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so: undefined reference to `awt_Lock@SUNWprivate_1.1'
/home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so: undefined reference to `awt_GetComponent@SUNWprivate_1.1'
/home/supertool/jdk1.7.0_09/jre/lib/amd64/libjawt.so: undefined reference to `awt_GetDrawingSurface@SUNWprivate_1.1'
collect2: ld 返回 1
make[2]: *** [examples/mysql2hdfs/happlier] 错误 1
make[1]: *** [examples/mysql2hdfs/CMakeFiles/happlier.dir/all] 错误 2
make: *** [all] 错误 2

        需要将libmawt.so的目录加入到LD_LIBRARY_DIR
export LD_LIBRARY_DIR=${JAVA_HOME}/jre/lib/amd64/xawt:${LD_LIBRARY_DIR}

       注意:${JAVA_HOME}/jre/lib/amd64/headless目录下也有libmawt.so文件,一定要xawt目录下的,否则仍然会报错。
       编译过后,会生成名为happlier的可执行文件。

       通过以下命令即可启动服务
./happlier [options] mysql://[email protected]:13000 hdfs://localhost:9000

       相关参数自行更改。

总结
  • Applier当前版本仍然是一年之前的0.1.0-alpha版本,该版本的作用主要是说明通过mysql的binlog实现向hdfs实时传输数据的可行性
  • 必须使用MySQL5.6版本,对5.6之前的版本不支持,这一点有很强的限制性
  • 安装过程非常繁杂,对底层环境的依赖较高,gcc、cmake都存在版本依赖问题(一般情况下都可满足),部分文件需要手动添加且需要打patch
  • 只有两个example可用,仅供测试
  • google上可以搜索到的资源基本都是在编译过程中出现的问题,很难找到使用方面的信息
  • 最重要的是:当前只支持insert操作,delete、update和DDL语句均不支持。


猜你喜欢

转载自paddy-w.iteye.com/blog/2023656