hadoop_environment configuration

1 Configure time synchronization:
    command: crontab -e
    edit: 0 1 * * * root /usr/sbin/ntpdate cn.pool.ntp.org [a enters editing state, ctrl+c exits editing state, :wq saves]
2 Configuration Hostname: vi /etc/sysconfig/network
3 Configure network:
     Command: setup --> network configuration-->eth0-->use dhcp space -->ip netmask
                     -->ok
      /sbin/service network restart #Restart Network service
      /sbin/ifconfig #Check network ip configuration
4 Configure cluster hosts list
  vi /etc/hosts
   192.168.1.100 master
   192.168.1.101 node1
===================== ============
5 ssh keyless login
Generate public and private keys
ssh-keygen -q -t rsa -N "" -f /home/hadoop/.ssh/id_rsa
cd / home/hadoop/.ssh
cp id_rsa.pub authorized_keys
chmod go-wx authorized_keys


6 Install JDK #download
jdk
wget http://60.28.110.228/source/package/jdk-6u21-linux-i586-rpm.bin #install
jdk
chmod +x jdk-6u21-linux-i586-rpm .bin
./jdk-6u21-linux-i586-rpm.bin #Configure

environment variables
vi /etc/profile.d/java.sh #Copy

and paste the content into vi.
export JAVA_HOME=/usr/java/jdk1.6.0_21/
export HADOOP_HOME=/opt/modules/hadoop/hadoop-1.0.3/
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH



# Manually take effect immediately
source / etc/profile

7 Create hadoop user and environment
#Create hadoop user
/usr/sbin/groupadd hadoop #Assign

hadoop to hadoop group
/usr/sbin/useradd hadoop -g hadoop #Create

hadoop code directory structure
mkdir -p /opt/modules/hadoop/

cp hadoop-1.0.3.tar.gz /opt/modules/hadoop/ #Create

hadoop data directory structure
mkdir -p /opt/data/hadoop/ #Modify

the directory structure permission to hadoop
chown -R hadoop:hadoop /opt/modules/hadoop/
chown -R hadoop:hadoop /opt/data/hadoop/ #Unzip

and test
cd /opt/modules/hadoop/
tar -xzvf hadoop-1.0.3.tar. gz
hadoop

8 hadoop stand-alone cluster configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hadoop-env.sh
Modify export HADOOP_HEAPSIZE=32

#Hadoop Common component configuration core-site.xml
vi /opt/modules/hadoop /hadoop-1.0.3/conf/core-site.xml
<configuration>
  <property>
    <name>fs.default.name</name>
<value>hdfs://master:9000</value>
<!--hadoop namenode server address and port, in domain name form -->
  </property>
  <property>
    <name>fs.checkpoint.dir</name>
    <value>/data/hadoop/hdfs/namesecondary</value >
    <!--hadoop secondary data storage path, which can be configured as multiple directories, separated by a number. -->
  </property>
  <property>
    <name>fs.checkpoint.period</name>
<value>1800</value>
<!-- editlog triggers a merge every 30 minutes -->
  </property>
  < property>
    <name>fs.checkpoint.size</name>
<value>33554432</value>
<!-- trigger a merge when editlog reaches 32m -->
  </property>
  <property>
    <name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec
</value>
<!-- Configure Hadoop compression package -->
  </property>

  <property>
    <name>fs.trash.interval</name>
    <value>1440</value>
    <description>Hadoop file recycling bin, automatic recycling time, in minutes, the setting here is 1 day . </description>
  </property>
</configuration>
#HDFS NameNode, DataNode build configuration hdfs-site.xml
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hdfs-site.xml

<?xml version= "1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>


<value>/data/hadoop/hdfs/name</value>
<!--HDFS namenode image file save address-->
    <description> </description>
  </property>
  <property>
    <name>dfs.data.dir </name>
<value>/data/hadoop/hdfs/data</value>
<!-- HDFS data file storage path, which can be configured in multiple different partitions and disks, separated by numbers -->
    <description> </description>
  </property>
  <property>
    <name>dfs.http.address</name>
<value>master:50070</value>
<!---HDFS Web view host and port -->
  </property >
  <property>
    <name>dfs.secondary.http.address</name>
<value>node1: 50090</value>
<!--Secondary HDFS web view host and port-->
  </property>
  <property>
    <name>dfs.replication</name>
<value>3</value>
<!--HDFS data storage copies, usually 3-->
  </property>
  <property>
    <name>dfs.datanode.du. reserved</name>
<value>1073741824</value>
<!-- datanode will reserve 1G space for other programs to use when writing to the disk, rather than full, unit bytes->
  </property>
  <property>
    <name>dfs .block.size</name>
<value>134217728</value>
<!--HDFS data block size, currently set to 128M/Block-->
  </property>

  <property>
    <name>dfs.permissions</name >
<value>false</value>
<!-- HDFS close file permissions -->
  </property>
</configuration>
#Configure MapReduce - JobTracker TaskTracker startup configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/mapred-site.xml
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
   
<!-- Put site-specific property overrides in this file. -->
 
<configuration>
  <property>
    <name>mapred.job.tracker</name>
<value>master:9001</value>
<!--JobTracker rpc主机和端口-->
  </property>
  <property>
    <name>mapred.local.dir</name>
<value>/data/hadoop/mapred/mrlocal</value>
<!--MapReduce 产生的中间文件数据,按照磁盘可以配置成多个-->
    <final>true</final>
  </property> 
  <property>
    <name>mapred.system.dir</name>
    <value>/data/hadoop/mapred/mrsystem</value>
<final>true</final>
<!-- MapReduce's system control file-->
  </property> 
  <property>
    <name>mapred.tasktracker.map.tasks.maximum</name>
<value>2</value>
<final>true</final >
<!-- The maximum number of map slots, the default is 3 -->
  </property> 
  <property>
    <name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>1</value>
< final>true</final>
<!-- The maximum number of reduce slots in a single machine-->
  </property>

  <property>
    <name>io.sort.mb</name>
    <value>32</value>
< final>true</final>
<!--reduce sorting uses memory size, the default is 100M, which is smaller than mapred.child.java. opts-->
  </property>

  <property>
    <name>mapred.child.java.opts</name>
<value>-Xmx64M</value>
<!-- map and reduce process JVM maximum memory configuration -->
  </property>


  <property>
    <name>mapred.compress.map.output</name>
<value>true< /value>
<!-- Map and reduce output intermediate files are compressed by default -->
  </property>
</configuration>
#Hadoop stand-alone system, start execution and exception checking #Create
Hadoop mapred and hdfs namenode and datanode directories under root
mkdir -p /data/hadoop/
chown -R hadoop:hadoop /data/* #Switch

to hadoop user
su hadoop #Create

mapreduce
mkdir -p /data/hadoop/mapred/mrlocal
mkdir -p /data/hadoop/mapred/ mrsystem

mkdir -p /data/hadoop/hdfs/name
mkdir -p /data/hadoop/hdfs/data
mkdir -p /data/hadoop/hdfs/namesecondary #Start and

switch to hadoop user

Under root, configure permissions
sudo chmod -R a+w /opt/modules/hadoop/

su hadoop #Format

file
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop namenode -format #Start

Master node:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start namenode #Start

JobTracker:
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop-daemon.sh start jobtracker #Start

secondarynamenode:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start secondarynamenode #Start

DataNode && TaskTracker:

/opt/ modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start datanode
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start tasktracker

Stop, the command is the same, replace start with stop #If

an error occurs, you can view the log
tail -f /opt/modules/hadoop/hadoop-1.0.3/logs/*

jps command, which is used to view some java-related processes under jdk
Note : chmod go -w /data/hadoop/hdfs/data

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326445671&siteId=291194637
Recommended