hadoop_environment configuration

1 Configure time synchronization:
    command: crontab -e
    edit: 0 1 * * * root /usr/sbin/ntpdate cn.pool.ntp.org [a enters editing state, ctrl+c exits editing state, :wq saves]
2 Configuration Hostname: vi /etc/sysconfig/network
3 Configure network:
     Command: setup --> network configuration-->eth0-->use dhcp space -->ip netmask
                     -->ok
      /sbin/service network restart #Restart Network service
      /sbin/ifconfig #Check network ip configuration
4 Configure cluster hosts list
vi /etc/hosts
   192.168.1.100 master
   192.168.1.101 node1
===================== ============
5 ssh keyless login
Generate public and private keys
ssh-keygen -q -t rsa -N "" -f /home/hadoop/.ssh/id_rsa
cd / home/hadoop/.ssh
cp id_rsa.pub authorized_keys
chmod go-wx authorized_keys

6 Install JDK #download
jdk
wget http://60.28.110.228/source/package/jdk-6u21-linux-i586-rpm.bin #install
jdk
chmod +x jdk-6u21-linux-i586-rpm .bin
./jdk-6u21-linux-i586-rpm.bin #Configure

environment variables
vi /etc/profile.d/java.sh #Copy

and paste the content into vi.
export JAVA_HOME=/usr/java/jdk1.6.0_21/
export HADOOP_HOME=/opt/modules/hadoop/hadoop-1.0.3/
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH

# Manually take effect immediately
source / etc/profile

7 Create hadoop user and environment
#Create hadoop user
/usr/sbin/groupadd hadoop #Assign

hadoop to hadoop group
/usr/sbin/useradd hadoop -g hadoop #Create

hadoop code directory structure
mkdir -p /opt/modules/hadoop/

cp hadoop-1.0.3.tar.gz /opt/modules/hadoop/ #Create

hadoop data directory structure
mkdir -p /opt/data/hadoop/ #Modify

the directory structure permission to hadoop
chown -R hadoop:hadoop /opt/modules/hadoop/
chown -R hadoop:hadoop /opt/data/hadoop/ #Unzip

and test
cd /opt/modules/hadoop/
tar -xzvf hadoop-1.0.3.tar. gz
hadoop

8 hadoop stand-alone cluster configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hadoop-env.sh
Modify export HADOOP_HEAPSIZE=32

#Hadoop Common component configuration core-site.xml
vi /opt/modules/hadoop /hadoop-1.0.3/conf/core-site.xml
<configuration>
<property>
    <name>fs.default.name</name>
<value>hdfs://master:9000</value>

</property>
<property>
    <name>fs.checkpoint.dir</name>
    <value>/data/hadoop/hdfs/namesecondary</value >
    
</property>
<property>
    <name>fs.checkpoint.period</name>
<value>1800</value>

</property>
< property>
    <name>fs.checkpoint.size</name>
<value>33554432</value>

</property>
<property>
    <name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec
</value>

</property>

<property>
    <name>fs.trash.interval</name>
    <value>1440</value>
    <description>Hadoop file recycling bin, automatic recycling time, in minutes, the setting here is 1 day . </description>
</property>
</configuration>
#HDFS NameNode, DataNode build configuration hdfs-site.xml
vi /opt/modules/hadoop/hadoop-1.0.3/conf/hdfs-site.xml

<?xml version= "1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<value>/data/hadoop/hdfs/name</value>

    <description> </description>
</property>
<property>
    <name>dfs.data.dir </name>
<value>/data/hadoop/hdfs/data</value>

    <description> </description>
</property>
<property>
    <name>dfs.http.address</name>
<value>master:50070</value>

</property >
<property>
    <name>dfs.secondary.http.address</name>
<value>node1: 50090</value>

</property>
<property>
    <name>dfs.replication</name>
<value>3</value>

</property>
<property>
    <name>dfs.datanode.du. reserved</name>
<value>1073741824</value>

</property>

<property>
    <name>dfs.permissions</name >
<value>false</value>

</property>
</configuration>
#Configure MapReduce - JobTracker TaskTracker startup configuration
vi /opt/modules/hadoop/hadoop-1.0.3/conf/mapred-site.xml
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>



<configuration>
<property>
    <name>mapred.job.tracker</name>
<value>master:9001</value>

</property>
<property>
    <name>mapred.local.dir</name>
<value>/data/hadoop/mapred/mrlocal</value>

    <final>true</final>
</property>
<property>
    <name>mapred.system.dir</name>
    <value>/data/hadoop/mapred/mrsystem</value>
<final>true</final>

</property>
<property>
    <name>mapred.tasktracker.map.tasks.maximum</name>
<value>2</value>
<final>true</final >

</property>
<property>
    <name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>1</value>
< final>true</final>

</property>

<property>
    <name>io.sort.mb</name>
    <value>32</value>
< final>true</final>

</property>

<property>
    <name>mapred.child.java.opts</name>
<value>-Xmx64M</value>

</property>

<property>
    <name>mapred.compress.map.output</name>
<value>true< /value>

</property>
</configuration>
#Hadoop stand-alone system, start execution and exception checking #Create
Hadoop mapred and hdfs namenode and datanode directories under root
mkdir -p /data/hadoop/
chown -R hadoop:hadoop /data/* #Switch

to hadoop user
su hadoop #Create

mapreduce
mkdir -p /data/hadoop/mapred/mrlocal
mkdir -p /data/hadoop/mapred/ mrsystem

mkdir -p /data/hadoop/hdfs/name
mkdir -p /data/hadoop/hdfs/data
mkdir -p /data/hadoop/hdfs/namesecondary #Start and

switch to hadoop user

Under root, configure permissions
sudo chmod -R a+w /opt/modules/hadoop/

su hadoop #Format

file
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop namenode -format #Start

Master node:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start namenode #Start

JobTracker:
/opt/modules/hadoop/ hadoop-1.0.3/bin/hadoop-daemon.sh start jobtracker #Start

secondarynamenode:
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start secondarynamenode #Start

DataNode && TaskTracker:

/opt/ modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start datanode
/opt/modules/hadoop/hadoop-1.0.3/bin/hadoop-daemon.sh start tasktracker

Stop, the command is the same, replace start with stop #If

an error occurs, you can view the log
tail -f /opt/modules/hadoop/hadoop-1.0.3/logs/*

jps command, which is used to view some java-related processes under jdk
Note : chmod go -w /data/hadoop/hdfs/data

hadoop_environment configuration

Guess you like