大数据(完全分布式)配置详解
linux大数据
hadoop完全分布式
完全分布式
hadoop最大的优势就是分布式集群计算,所以在生产环境下都是搭建的最后一种模式:完全分布模式
技术准备
系统规划
搭建
测试
上线使用
完全分布式
系统规则
主机 |
角色 |
软件 |
192.168.6.10 master |
NameNode SecondaryNameNode ResourceManager |
HDFS YARN |
192.168.6.11 node1 |
DataNode NodeManager |
HDFS YARN |
192.168.6.12 node2 |
DataNode NodeManager |
HDFS YARN |
192.168.6.13 node3 |
DataNode NodeManager |
HDFS YARN |
[[email protected] ~]# cd /etc/libvirt/qemu/
[[email protected] qemu]# sed 's/demo/node10/' demo.xml > /etc/libvirt/qemu/node10.xml
[[email protected] qemu]# sed 's/demo/node11/' demo.xml > /etc/libvirt/qemu/node11.xml
[[email protected] qemu]# sed 's/demo/node12/' demo.xml > /etc/libvirt/qemu/node12.xml
[[email protected] qemu]# sed 's/demo/node13/' demo.xml > /etc/libvirt/qemu/node13.xml
[[email protected] qemu]# cd /var/lib/libvirt/images/
[[email protected] images]# qemu-img create -b node.qcow2 -f qcow2 node10.img 20G
[[email protected] images]# qemu-img create -b node.qcow2 -f qcow2 node11.img 20G
[[email protected] images]# qemu-img create -b node.qcow2 -f qcow2 node12.img 20G
[[email protected] images]# qemu-img create -b node.qcow2 -f qcow2 node13.img 20G
[[email protected] images]# cd /etc/libvirt/qemu/
[[email protected] qemu]# virsh define node10.xml
[[email protected] qemu]# virsh define node11.xml
[[email protected] qemu]# virsh define node12.xml
[[email protected] qemu]# virsh define node13.xml
[[email protected] ~]# vim /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
ONBOOT=yes
IPV6INIT=no
BOOTPROTO=static
TYPE=Ethernet
IPADDR="192.168.6.10"
NETMASK="255.255.255.0"
GATEWAY="192.168.6.254"
[[email protected] ~]# halt -p
[[email protected] ~]# ifconfig | head -3
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.6.10 netmask 255.255.255.0 broadcast 192.168.6.255
[[email protected] ~]# hostnamectl set-hostname nn01
[[email protected] ~]# vim /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
ONBOOT=yes
IPV6INIT=no
BOOTPROTO=static
TYPE=Ethernet
IPADDR="192.168.6.11"
NETMASK="255.255.255.0"
GATEWAY="192.168.6.254"
[[email protected] ~]# halt -p
[[email protected] ~]# hostnamectl set-hostname node1
[[email protected] ~]# ifconfig | head -3
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.6.11 netmask 255.255.255.0 broadcast 192.168.6.255
[[email protected] ~]# vim /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
ONBOOT=yes
IPV6INIT=no
BOOTPROTO=static
TYPE=Ethernet
IPADDR="192.168.6.12"
NETMASK="255.255.255.0"
GATEWAY="192.168.6.254"
[[email protected] ~]# halt -p
[[email protected] ~]# hostnamectl set-hostname node2
[[email protected] ~]# vim /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
ONBOOT=yes
IPV6INIT=no
BOOTPROTO=static
TYPE=Ethernet
IPADDR="192.168.6.13"
NETMASK="255.255.255.0"
GATEWAY="192.168.6.254"
[[email protected] ~]# hostnamectl set-hostname node3
[[email protected] ~]# halt -p
[[email protected] ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.6.10 nn01 namenode,secondarynamenode
192.168.6.11 node1 datanode
192.168.6.12 node2 datanode
192.168.6.13 node3 datanode
[[email protected] ~]# cat /etc/yum.repos.d/local.repo
[local_source]
name=CentOS Source
baseurl=ftp://192.168.6.254/centos7
enabled=1
gpgcheck=1
[[email protected] ~]#
[[email protected] hadoop]# pwd
/usr/local/hadoop/etc/hadoop
[[email protected] ~]# vim /etc/ssh/ssh_config
Host *
GSSAPIAuthentication yes
StrictHostKeyChecking no
[[email protected] xx]# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar wordcount oo xx
[[email protected] hadoop]# file share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar
share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar: Zip archive data, at least v1.0 to extract
[[email protected] hadoop]# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar
[[email protected] hadoop]# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar wordcount
Usage: wordcount <in> [<in>...] <out>
/local/hadoop/xx already exists
[[email protected] hadoop]# cat core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://nn01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop</value>
</property>
</configuration>
[[email protected] hadoop]# cat hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>nn01:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>nn01:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
[[email protected] hadoop]# cat hadoop-env.sh
export JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-11.b12.el7.x86_64/jre/"
export HADOOP_CONF_DIR="/usr/local/hadoop/etc/hadoop"
[[email protected] hadoop]# cat slaves
node1
node2
node3
[[email protected] ~]# yum -y install java-1.8.0-openjdk-devel
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# scp -r '/root/桌面/hadoop-2.7.6.tar.gz' 192.168.6.10:/root
[[email protected] ~]# tar zxf hadoop-2.7.6.tar.gz
[[email protected] ~]# mv hadoop-2.7.6 /usr/local/hadoop
[[email protected] ~]# yum -y install java-1.8.0-devel
– ssh-****** -b 2048 -t rsa -N '' -f key
– ssh-copy-id -i ./key.pub [email protected]
[[email protected] .ssh]# mv key /root/.ssh/id_rsa
[[email protected] hadoop]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] hadoop]# for i in node{1..3};do
> rsync -aSH --delete /usr/local/hadoop ${i}:/usr/local/ -e 'ssh' &
> done
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# yum -y install rsync-3.0.9-18.el7.x86_64
[[email protected] ~]# ls /usr/local/hadoop
bin etc include lib libexec LICENSE.txt NOTICE.txt oo README.txt sbin share xx xx1
[[email protected] bin]# chmod 755 rrr
[[email protected] bin]# ls
container-executor hdfs mapred.cmd test-container-executor
hadoop hdfs.cmd rcc yarn
hadoop.cmd mapred rrr yarn.cmd
[[email protected] bin]# ./rrr node{1..3}
[[email protected] bin]# cat rrr
#!/bin/bash
for i in [email protected];do
rsync -aSH --delete /usr/local/hadoop ${i}:/usr/local/ -e 'ssh' &
done
wait
./bin/hdfs namenode –format
./sbin/start-dfs.sh
ssh node1 jps
./bin/hdfs dfsadmin -report
Live datanodes(3)
hadoop.tmp.dir /tmp/hadoop-${user.name}