Hadoop big data platform is manually built-hadoop

1. Download list: File upload and download between winow and linux FileZilla
  jdk-7u79-linux-x64.tar.gz
  apache-maven-3.3.9-bin.tar.gz
  hadoop-2.6.0-cdh5.8.0.tar. gz
  hadoop-native-64-2.6.0.tar
  hbase-1.2.0-cdh5.8.0.tar.gz
  hive-1.1.0-cdh5.8.0.tar.gz
  hue-3.9.0-cdh5.8.0.tar. gz
  scala-2.10.4.gz
  spark-1.6.0-cdh5.8.0.tar
  sqoop-1.4.6-cdh5.8.0.tar.gz
2. Install jdk (root)
a. cd /usr/
  mkdir java
  tar - zxvf jdk-7u79-linux-x64.tar.gz
  Configure environment variables:
  add at the end of the /etc/profile file: For this file, each logged in user can load the environment variables.
  export JAVA_HOME=/usr/java/jdk1.7.0_79
  export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
  source /etc/profile #Environment variables take effect immediately
 
  b. Verify the installation of
    java -verion

  c. The end looks like this.




3. Install hadoop, log in as root and give the opt folder
  chown -R hadoop /opt
 
  tar -zxvf hadoop-2.6.0-cdh5.8.0.tar.gz

  a . Modify /opt/hadoop-2.6.0-cdh5.8.0
    Add
  b at the end of /etc/hadoop/hadoop-env.sh ./opt/hadoop-2.6.0-cdh5.8.0/etc/hadoop directory Add core-site.xml

    <?xml version="1.0" encoding="UTF- 8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/hadoop-2.6.0-cdh5.8.0/tmp</value>
  </property>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://master:9000</value>
  </property>
  <property>
  <name>hadoop.proxyuser.hadoop.hosts</name>
  <value>*</value>
</property>
<property>
  <name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>

c .修改hdfs-site.xml

  <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>
dfs.replication
</name>
<value>
3
</value>
</property>
<property>
<name>
dfs.namenode.name.dir
</name>
<value>
file:/opt/hdfs/name
</value>
</property>
<property>
<name>
dfs.namenode.edits.dir
</name>
<value>
file:/opt/hdfs/nameedit
</value>
</property>

<property>
<name>
dfs.datanode.data.dir
</name>
<value>
file:/opt/hdfs/data
</value>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value>master:9000</value>
</property>
<property>
<name>dfs.http.address</name><value>master:50070</value></property>
<property><name>dfs.namenode.secondary.http-address</name><value>master:50090</value></property>
<property><name>dfs.webhdfs.enabled</name><value>true</value></property>
<property><name>dfs.permissions</name><value>false</value></property>
<property>
  <name>dfs.webhdfs.enabled</name>
  <value>true</value>
</property>
</configuration>

d .修改mapred-site.xml

  <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property><name>mapreduce.framework.name</name><value>yarn</value></property>
<property><name>mapreduce.jobhistory.address</name><value>slave2:10020</value></property>
<property><name>mapreduce.jobhistory.webapp.address</name><value>slave2:19888</value></property>
</configuration>

e .修改yarn-site.xml

  <?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8080</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8082</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>


















Configure environment variables in /etc/profile
export HADOOP_HOME=/opt/hadoop-2.6.0-cdh5.8.0/
export PATH=$PATH:/opt/hadoop-2.6.0-cdh5.8.0/bin

h

before the first startup HDFS needs to be formatted.

/opt/hadoop-2.6.0-cdh5.8.0/bin/hadoop namenode -format

Follow the prompt to enter Y.

i start and verify that
the jps command is to view the java-related processes and process names

/opt/hadoop-2.6.0-cdh5 .8.0/sbin/start-all.sh

[hadoop@master ~]$ /opt/hadoop-2.6.0-cdh5.8.0/sbin/start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [master]
master: starting namenode, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/hadoop-hadoop-namenode-master.out
slave2: starting datanode, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/hadoop-hadoop-datanode-slave2.out
slave1: starting datanode, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/hadoop-hadoop-datanode-slave1.out
Starting secondary namenodes [master]
master: starting secondarynamenode, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/hadoop-hadoop-secondarynamenode-master.out
starting yarn daemons
starting resourcemanager, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/yarn-hadoop-resourcemanager-master.out
slave2: starting nodemanager, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/yarn-hadoop-nodemanager-slave2.out
slave1: starting nodemanager, logging to /opt/hadoop-2.6.0-cdh5.8.0/logs/yarn-hadoop-nodemanager-slave1.out
[hadoop@master ~]$ jps
3467 ResourceManager
3324 SecondaryNameNode
3173 NameNode
3723 Jps
[hadoop@master ~]$

The master node shows that the above three processes are successfully started.

3467 ResourceManager
3324 SecondaryNameNode
3173 NameNode can see NodeManager when

executing jps on the slave node slave1 machine
, and DataNode indicates success.

[hadoop@slave1 ~]$ jps
2837 NodeManager
2771 DataNode
3187 Jps
[hadoop @slave1 ~]$

Execute jps on the slave node slave2 machine

[hadoop@slave2 ~]$ jps
2839 NodeManager
3221 Jps
2773 DataNode
[hadoop@slave2 ~]$

j How to solve various errors in hadoop installation.

Any issues can be resolved with the logs. The log file is in the default location. The default log level is info.
/opt/hadoop-2.6.0-cdh5.8.0/logs

So you can modify the log level: debug can get more detailed error information.

Modify HDFS to debug level:

For HDFS, you only need to modify sbin/Hadoop-daemon.sh and replace INFO with DEBUG.

export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"DEBUG,RFA"} 
export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"DEBUG,RFAS"} 
export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"DEBUG,NullAppender"}

Configure Yarn to print DEBUG information to log file , just need to modify its startup script sbin/yarn-daemon.sh, change INFO to DEBUG to
export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-DEBUG,RFA} 

---------------- -------------------------------------------------- -------
According to the configuration port in xml to access web management, the interface is as follows.





i hadoop history job (port related configuration is in mapred-site.xml)

to start history-server:

$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
停止history-server:

$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver

history-server启动之后,可以通过浏览器访问WEBUI: slave2:19888




3.httpfs

[hadoop@slave2 sbin]$ ./httpfs.sh start

Setting HTTPFS_HOME:          /opt/hadoop-2.6.0-cdh5.8.0
Setting HTTPFS_CONFIG:        /opt/hadoop-2.6.0-cdh5.8.0/etc/hadoop
Sourcing:                    /opt/hadoop-2.6.0-cdh5.8.0/etc/hadoop/httpfs-env.sh
Setting HTTPFS_LOG:           /opt/hadoop-2.6.0-cdh5.8.0/logs
Setting HTTPFS_TEMP:           /opt/hadoop-2.6.0-cdh5.8.0/temp
Setting HTTPFS_HTTP_PORT:     14000
Setting HTTPFS_ADMIN_PORT:     14001
Setting HTTPFS_HTTP_HOSTNAME: slave2
Setting HTTPFS_SSL_ENABLED: false
Setting HTTPFS_SSL_KEYSTORE_FILE:     /home/hadoop/.keystore
Setting HTTPFS_SSL_KEYSTORE_PASS:     password
Setting CATALINA_BASE:       /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat
Setting HTTPFS_CATALINA_HOME:       /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat
Setting CATALINA_OUT:        /opt/hadoop-2.6.0-cdh5.8.0/logs/httpfs-catalina.out
Setting CATALINA_PID:        /tmp/httpfs.pid

Using   CATALINA_OPTS:      
Adding to CATALINA_OPTS:     -Dhttpfs.home.dir=/opt/hadoop-2.6.0-cdh5.8.0 -Dhttpfs.config.dir=/opt/hadoop-2.6.0-cdh5.8.0/etc/hadoop -Dhttpfs.log.dir=/opt/hadoop-2.6.0-cdh5.8.0/logs -Dhttpfs.temp.dir=/opt/hadoop-2.6.0-cdh5.8.0/temp -Dhttpfs.admin.port=14001 -Dhttpfs.http.port=14000 -Dhttpfs.http.hostname=slave2
Using CATALINA_BASE:   /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat
Using CATALINA_HOME:   /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat
Using CATALINA_TMPDIR: /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat/temp
Using JRE_HOME:        /usr/java/jdk1.7.0_79
Using CLASSPATH:       /opt/hadoop-2.6.0-cdh5.8.0/share/hadoop/httpfs/tomcat/bin/bootstrap.jar
Using CATALINA_PID:    /tmp/httpfs.pid
[hadoop@slave2 sbin]$ su -
Password:
[root@slave2 ~]# netstat -apn|grep 14000
tcp        0      0 :::14000                    :::*                        LISTEN      4013/java          
[root@slave2 ~]# netstat -apn|grep 14001
tcp        0      0 ::ffff:127.0.0.1:14001      :::*                        LISTEN      4013/java          
[root@slave2 ~]#







webhdfs










 
 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326403122&siteId=291194637