1 Configure time synchronization:
command: crontab -e
edit: 0 1 * * * root /usr/sbin/ntpdate cn.pool.ntp.org [a enters editing state, ctrl+c exits editing state, :wq saves]
2 Configuration Hostname: vi /etc/sysconfig/network
3 Configure network:
Command: setup --> network configuration-->eth0-->use dhcp space -->ip netmask
-->ok
/sbin/service network restart #Restart Network service
/sbin/ifconfig #Check network ip configuration
4 Configure cluster hosts list
vi /etc/hosts
192.168.1.100 master
192.168.1.101 node1
===================== ============
5 ssh keyless login
Generate public and private keys
ssh-keygen -q -t rsa -N "" -f /home/hadoop/.ssh/id_rsa
cd / home/hadoop/.ssh
cp id_rsa.pub authorized_keys
chmod go-wx authorized_keys
6 Install JDK #download
jdk
wget http://60.28.110.228/source/package/jdk-6u21-linux-i586-rpm.bin #install
jdk
chmod +x jdk-6u21-linux-i586-rpm .bin
./jdk-6u21-linux-i586-rpm.bin #Configure
environment variables
vi /etc/profile.d/java.sh #Copy
and paste the content into vi.
export JAVA_HOME=/usr/java/jdk1.6.0_21/
export HADOOP_HOME=/opt/modules/hadoop/hadoop-1.0.3/
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH
# Manually take effect immediately
source / etc/profile
7 Create hadoop user and environment
#Create hadoop user
/usr/sbin/groupadd hadoop #Assign
hadoop to hadoop group
/usr/sbin/useradd hadoop -g hadoop #Create
hadoop code directory structure
mkdir -p /opt/modules/hadoop/
cp hadoop-1.0.3.tar.gz /opt/modules/hadoop/ #Create
hadoop data directory structure
mkdir -p /opt/data/hadoop/ #Modify
the directory structure permission to hadoop
chown -R hadoop:hadoop /opt/modules/hadoop/
chown -R hadoop:hadoop /opt/data/hadoop/ #Unzip
and test
cd /opt/modules/hadoop/
tar -xzvf hadoop-1.0.3.tar. gz
hadoop
8 hadoop stand-alone cluster configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hadoop-env.sh
Modify export HADOOP_HEAPSIZE=32
#Hadoop Common component configuration core-site.xml
vi /opt/modules/hadoop /hadoop-1.0.3/conf/core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
<!--hadoop namenode server address and port, in domain name form -->
</property>
<property>
<name>fs.checkpoint.dir</name>
<value>/data/hadoop/hdfs/namesecondary</value >
<!--hadoop secondary data storage path, which can be configured as multiple directories, separated by a number. -->
</property>
<property>
<name>fs.checkpoint.period</name>
<value>1800</value>
<!-- editlog triggers a merge every 30 minutes -->
</property>
< property>
<name>fs.checkpoint.size</name>
<value>33554432</value>
<!-- trigger a merge when editlog reaches 32m -->
</property>
<property>
<name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec
</value>
<!-- Configure Hadoop compression package -->
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
<description>Hadoop file recycling bin, automatic recycling time, in minutes, the setting here is 1 day . </description>
</property>
</configuration>
#HDFS NameNode, DataNode build configuration hdfs-site.xml
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hdfs-site.xml
<?xml version= "1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<value>/data/hadoop/hdfs/name</value>
<!--HDFS namenode image file save address-->
<description> </description>
</property>
<property>
<name>dfs.data.dir </name>
<value>/data/hadoop/hdfs/data</value>
<!-- HDFS data file storage path, which can be configured in multiple different partitions and disks, separated by numbers -->
<description> </description>
</property>
<property>
<name>dfs.http.address</name>
<value>master:50070</value>
<!---HDFS Web view host and port -->
</property >
<property>
<name>dfs.secondary.http.address</name>
<value>node1: 50090</value>
<!--Secondary HDFS web view host and port-->
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<!--HDFS data storage copies, usually 3-->
</property>
<property>
<name>dfs.datanode.du. reserved</name>
<value>1073741824</value>
<!-- datanode will reserve 1G space for other programs to use when writing to the disk, rather than full, unit bytes->
</property>
<property>
<name>dfs .block.size</name>
<value>134217728</value>
<!--HDFS data block size, currently set to 128M/Block-->
</property>
<property>
<name>dfs.permissions</name >
<value>false</value>
<!-- HDFS close file permissions -->
</property>
</configuration>
#Configure MapReduce - JobTracker TaskTracker startup configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/mapred-site.xml
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>master:9001</value>
<!--JobTracker rpc主机和端口-->
</property>
<property>
<name>mapred.local.dir</name>
<value>/data/hadoop/mapred/mrlocal</value>
<!--MapReduce 产生的中间文件数据,按照磁盘可以配置成多个-->
<final>true</final>
</property>
<property>
<name>mapred.system.dir</name>
<value>/data/hadoop/mapred/mrsystem</value>
<final>true</final>
<!-- MapReduce's system control file-->
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value>2</value>
<final>true</final >
<!-- The maximum number of map slots, the default is 3 -->
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>1</value>
< final>true</final>
<!-- The maximum number of reduce slots in a single machine-->
</property>
<property>
<name>io.sort.mb</name>
<value>32</value>
< final>true</final>
<!--reduce sorting uses memory size, the default is 100M, which is smaller than mapred.child.java. opts-->
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx64M</value>
<!-- map and reduce process JVM maximum memory configuration -->
</property>
<property>
<name>mapred.compress.map.output</name>
<value>true< /value>
<!-- Map and reduce output intermediate files are compressed by default -->
</property>
</configuration>
#Hadoop stand-alone system, start execution and exception checking #Create
Hadoop mapred and hdfs namenode and datanode directories under root
mkdir -p /data/hadoop/
chown -R hadoop:hadoop /data/* #Switch
to hadoop user
su hadoop #Create
mapreduce
mkdir -p /data/hadoop/mapred/mrlocal
mkdir -p /data/hadoop/mapred/ mrsystem
mkdir -p /data/hadoop/hdfs/name
mkdir -p /data/hadoop/hdfs/data
mkdir -p /data/hadoop/hdfs/namesecondary #Start and
switch to hadoop user
Under root, configure permissions
sudo chmod -R a+w /opt/modules/hadoop/
su hadoop #Format
file
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop namenode -format #Start
Master node:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start namenode #Start
JobTracker:
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop-daemon.sh start jobtracker #Start
secondarynamenode:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start secondarynamenode #Start
DataNode && TaskTracker:
/opt/ modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start datanode
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start tasktracker
Stop, the command is the same, replace start with stop #If
an error occurs, you can view the log
tail -f /opt/modules/hadoop/hadoop-1.0.3/logs/*
jps command, which is used to view some java-related processes under jdk
Note : chmod go -w /data/hadoop/hdfs/data
hadoop_environment configuration
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=326445640&siteId=291194637
Recommended
Ranking