Download and install hadoop
Download prebuild hadoop-2.5.1 from apache
tar -xzf hadoop-2.5.1.tar.gz
cd hadoop-2.5.1
vim etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
vim etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/disk1/dfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/disk1/dfs/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
vim etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
vim etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>/home/disk1/software/hadoop/hadoop-2.5.1/share/hadoop/giraph/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
ln hadoop
mkdir /usr/local/hadoop
ln -s /home/disk1/software/hadoop/hadoop-2.5.1 /usr/local/hadoop
set environment
vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop
export PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin
. /etc/profile
Format Namenode
bin/hdfs namenode -format
Start HDFS and YARN
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
Giraph
Download Giraph
git clone https://github.com/apache/giraph.git
Modify pom.xml
modify the profile “hadoop-yarn”, remove STATIC_SASL_SYMBOL
- <munge.symbols>PURE_YARN,STATIC_SASL_SYMBOL</munge.symbols>
+ <munge.symbols>PURE_YARN</munge.symbols>
Compile Giraph
mvn -Phadoop_yarn -Dhadoop.version=2.5.1 clean package -DskipTests
Copy Giraph jar to Mapreduce classpath
mkdir -p /home/disk1/software/hadoop/hadoop-2.5.1/share/hadoop/giraph
cp ./giraph-examples/target/giraph-examples-1.3.0-SNAPSHOT-for-hadoop-2.5.1-jar-with-dependencies.jar /home/disk1/software/hadoop/hadoop-2.5.1/share/hadoop/giraph
cp ./giraph-examples/target/giraph-examples-1.3.0-SNAPSHOT-for-hadoop-2.5.1-jar-with-dependencies.jar /home/disk1/software/hadoop/hadoop-2.5.1/share/hadoop/giraph
vim tiny_graph.txt
[0,0,[[1,1],[3,3]]]
[1,0,[[0,1],[2,2],[3,1]]]
[2,0,[[1,2],[4,4]]]
[3,0,[[0,3],[1,1],[4,4]]]
[4,0,[[3,4],[2,4]]]
hadoop fs -mkdir /giraph/test
hadoop fs -put tiny_graph.txt /giraph/test
run shortest paths computation
hadoop fs -rmr /giraph/test/shortestpaths
hadoop jar ./giraph-examples/target/giraph-examples-1.3.0-SNAPSHOT-for-hadoop-2.5.1-jar-with-dependencies.jar org.apache.giraph.GiraphRunner org.apache.giraph.examples.SimpleShortestPathsComputation -vif org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexInputFormat -vip /giraph/test/tiny_graph.txt -vof org.apache.giraph.io.formats.IdWithValueTextOutputFormat -op /giraph/test/shortestpaths -w 1 -ca giraph.SplitMasterWorker=false
The job will exit with success.
Run PageRank
hadoop jar ./giraph-examples/target/giraph-examples-1.3.0-SNAPSHOT-for-hadoop-2.5.1-jar-with-dependencies.jar org.apache.giraph.GiraphRunner \
-D libjars=./giraph-core/target/giraph-1.3.0-SNAPSHOT-for-hadoop-2.5.1-jar-with-dependencies.jar \
org.apache.giraph.examples.PageRankComputation \
-wc org.apache.giraph.examples.RandomWalkWorkerContext \
-vif org.apache.giraph.examples.LongDoubleNullTextInputFormat \
-vip /giraph/test/pagerank/100m \
-vof org.apache.giraph.examples.VertexWithDoubleValueNullEdgeTextOutputFormat \
-op /giraph/test/pagerank-output \
-w 2 \
-ca giraph.zkList=localhost:2181 \
-ca giraph.masterComputeClass=org.apache.giraph.examples.RandomWalkVertexMasterCompute \
-ca giraph.workerContextClass=org.apache.giraph.examples.RandomWalkWorkerContext \
-ca giraph.outEdgesClass=org.apache.giraph.edge.LongNullArrayEdges \
-ca giraph.messageCombinerClass=org.apache.giraph.combiner.DoubleSumMessageCombiner \
-ca org.apache.giraph.examples.RandomWalkComputation.maxSupersteps=2
The format of input file
The first column value is vertex id, one line for a vertex, the remaining columns values is the out edge of the vertex.
0 1489177 1082020 1561079 548159 1083633
1 1558419 1723200 777065 1796463 600413
2 248796 1978584 376081 2007794 1059900
3 177672 1522680 1880044 2034928 1394061
4 1577684 738625 141693 1584899 1056347
5 790735 1299055 205872 370585 1063957
6 1623007 665048 1076178 391388 1023548
Part of Log
20/06/16 09:37:58 INFO yarn.GiraphYarnClient: Giraph: org.apache.giraph.examples.PageRankComputation, Elapsed: 501.84 secs
20/06/16 09:37:58 INFO yarn.GiraphYarnClient: appattempt_1591786803275_0009_000001, State: RUNNING, Containers used: 2
20/06/16 09:38:02 INFO yarn.GiraphYarnClient: Giraph: org.apache.giraph.examples.PageRankComputation, Elapsed: 505.85 secs
20/06/16 09:38:02 INFO yarn.GiraphYarnClient: appattempt_1591786803275_0009_000001, State: RUNNING, Containers used: 2
20/06/16 09:38:05 INFO yarn.GiraphYarnClient: Cleaning up HDFS distributed cache directory for Giraph job.
20/06/16 09:38:05 INFO yarn.GiraphYarnClient: Completed Giraph: org.apache.giraph.examples.PageRankComputation: SUCCEEDED, total running time: 8 minutes, 27 seconds.
guava compatibility
Hadoop-2.5.1 uses guava with version 11.0.2. Giraph uses guava with version 21.0. So it may cause NoSuchMethodError if loaded guava in hadoop first. Remove the unnecessary settings from mapred-site.xml, remaining only set settings listed works ok.
cd ${HADOOP_HOME}
find . -name guava*
./share/hadoop/yarn/lib/guava-11.0.2.jar
./share/hadoop/hdfs/lib/guava-11.0.2.jar
./share/hadoop/common/lib/guava-11.0.2.jar
./share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/guava-11.0.2.jar
java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkState(ZLjava/lang/String;I)V
at org.apache.giraph.worker.AllWorkersInfo.<init>(AllWorkersInfo.java:54)
at org.apache.giraph.worker.WorkerContext.setupSuperstep(WorkerContext.java:67)
at org.apache.giraph.graph.GraphTaskManager.prepareForSuperstep(GraphTaskManager.java:502)
at org.apache.giraph.graph.GraphTaskManager.execute(GraphTaskManager.java:354)
at org.apache.giraph.yarn.GiraphYarnTask.run(GiraphYarnTask.java:92)
at org.apache.giraph.yarn.GiraphYarnTask.main(GiraphYarnTask.java:184)