Hadoop 2.7.2 Installing on Ubuntu 14.04 (Single-Node Cluster)
http://www.bogotobogo.com/Hadoop/BigData_hadoop_Install_on_ubuntu_single_node_cluster.php
Installation of Hadoop in UBUNTU 14.0.4 (Multi Node Cluser)
Here is a quick how to, to edit your /etc/hosts file.
The $ represents a line of code you may enter in a terminal.
Open /etc/hosts with your favorite text editor. Remember to use sudo.
hduser@rkmishralinux:~$ gedit /etc/hosts
Master Node
Master Node:rkmishralinux:172.16.22.22
127.0.0.1 localhost
#127.0.1.1 rkmishralinux
172.16.22.22 rkmishralinux
172.16.22.171 hduserCP1
172.16.22.98 hduserCP2
172.16.22.97 hduserCP3
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
rkmishra@rkmishralinux:~$ sudo adduser hduser
$su - hduser
$ ssh-keygen -t rsa -P ""
#Authorize the key to enable password less ssh
$ cat /home/hduser/.ssh/id_rsa.pub >> /home/hduser/.ssh/authorized_keys
$ chmod 600 authorized_keys
#Copy this key to slave-1 to enable password less ssh
$ ssh-copy-id -i ~/.ssh/id_rsa.pub hduserCP1
#Make sure you can do a password less ssh using following command.
$ ssh hduserCP1
$ ssh-copy-id -i ~/.ssh/id_rsa.pub hduserCP2
$ ssh-copy-id -i ~/.ssh/id_rsa.pub hduserCP3
copy hadoop to /usr/local
---------- $sudo chown -R hduser.hduser /usr/local/hadoop
----------- $gedit ~/.bashrc ----------
# Set HADOOP_HOME
export HADOOP_HOME=/usr/local/hadoop
# Set JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
# Add Hadoop bin and sbin directory to PATH
export PATH=$PATH:$HADOOP_HOME/bin;$HADOOP_HOME/sbin
----------- $gedit /usr/local/hadoop/etc/hadoop/hadoop-env.sh ---------
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
------------- $gedit /usr/local/hadoop/etc/hadoop/core-site.xml ----------
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hduser/tmp</value>
<description>Temporary Directory.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://rkmishralinux:54310</value>
<description>Use HDFS as file storage engine</description>
</property>
------------ $gedit /usr/local/hadoop/etc/hadoop/mapred-site.xml -------------
<property>
<name>mapreduce.jobtracker.address</name>
<value>rkmishralinux:54311</value>
<description>The host and port that the MapReduce job tracker runs
at. If “local”, then jobs are run in-process as a single map
and reduce task.
</description>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>The framework for running mapreduce jobs</description>
</property>
---------------------- $gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml -------------------
<property>
<name>dfs.replication</name>
<value>3</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hadoop-data/hduser/hdfs/namenode</value>
<description>Determines where on the local filesystem the DFS name node should store the name table(fsimage). If this is a comma-delimited list of directories then the name table is replicated in all of the directories, for redundancy.
</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hadoop-data/hduser/hdfs/datanode</value>
<description>Determines where on the local filesystem an DFS data node should store its blocks. If this is a comma-delimited list of directories, then data will be stored in all named directories, typically on different devices. Directories that do not exist are ignored.
</description>
</property>
--- $mkdir -p /hadoop-data/hduser/hdfs/namenode ---
--- $mkdir -p /hadoop-data/hduser/hdfs/datanode ---
--- $chown -R hduser.hduser /hadoop-data ----
------------------ $gedit /usr/local/hadoop/etc/hadoop/yarn-site.xml ------------
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>rkmishralinux:8030</value>
</property>
<property>
<name>yarn.resoucat slaves
rkmishralinux
hduserCP1
hduserCP2
hduserCP3
rcemanager.address</name>
<value>rkmishralinux:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>rkmishralinux:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>rkmishralinux:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>rkmishralinux:8033</value>
</property>
------------------------- $gedit /usr/local/hadoop/etc/hadoop/slaves -----------
rkmishralinux
hduserCP1
hduserCP2
hduserCP3
=================================================================
Slave Node
Slace Node:hduserCP1:172.16.22.171
127.0.0.1 localhost
#127.0.1.1 rkmishralinux
172.16.22.22 rkmishralinux
172.16.22.171 hduserCP1
172.16.22.98 hduserCP2
172.16.22.97 hduserCP3
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
rkmishra@hduserCP1:~$ sudo adduser hduser
$su - hduser
copy hadoop to /usr/local
---------- $sudo chown -R hduser.hduser /usr/local/hadoop ------------
-----------$mkdir ~/hadoop/tmp --------------------
----------- $gedit ~/.bashrc ----------
# Set HADOOP_HOME
export HADOOP_HOME=/usr/local/hadoop
# Set JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
# Add Hadoop bin and sbin directory to PATH
export PATH=$PATH:$HADOOP_HOME/bin;$HADOOP_HOME/sbin
----------- $gedit /usr/local/hadoop/etc/hadoop/hadoop-env.sh ---------
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
------------- $gedit /usr/local/hadoop/etc/hadoop/core-site.xml ----------
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hduser/tmp</value>
<description>Temporary Directory.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://rkmishralinux:54310</value>
<description>Use HDFS as file storage engine</description>
</property>
---------------------- $gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml -------------------
<property>
<name>dfs.replication</name>
<value>3</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hadoop-data/hduser/hdfs/namenode</value>
<description>Determines where on the local filesystem the DFS name node should store the name table(fsimage). If this is a comma-delimited list of directories then the name table is replicated in all of the directories, for redundancy.
</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hadoop-data/hduser/hdfs/datanode</value>
<description>Determines where on the local filesystem an DFS data node should store its blocks. If this is a comma-delimited list of directories, then data will be stored in all named directories, typically on different devices. Directories that do not exist are ignored.
</description>
</property>
--- $mkdir -p /hadoop-data/hduser/hdfs/namenode ---
--- $mkdir -p /hadoop-data/hduser/hdfs/datanode ---
--- $chown -R hduser.hduser /hadoop-data ----
------------------ $gedit /usr/local/hadoop/etc/hadoop/yarn-site.xml ------------
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>rkmishralinux:8030</value>
</property>
<property>
<name>yarn.resoucat slaves
rkmishralinux
hduserCP1
hduserCP2
hduserCP3
rcemanager.address</name>
<value>rkmishralinux:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>rkmishralinux:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>rkmishralinux:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>rkmishralinux:8033</value>
</property>
=================================================================
[Master Node]
$ hdfs namenode -format
[Master Node]
$ start-dfs.sh
hduser@rkmishralinux:~$ jps
12642 DataNode
12844 SecondaryNameNode
1500 Jps
12478 NameNode
[Slave Node]
hduser@hduserCP1:~$ jps
22682 Jps
4975 DataNode
[Master Node]
$ start-yarn.sh
hduser@rkmishralinux:~$ jps
12642 DataNode
13045 ResourceManager
13366 NodeManager
12844 SecondaryNameNode
1500 Jps
12478 NameNode
[Slave Node]
hduser@hduserCP1:~$ jps
5136 NodeManager
22682 Jps
4975 DataNode
http://rkmishralinux:8088/cluster/nodes
$ hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 30 100
http://kmishralinux:8088/cluster/apps