Hadoop HA(高可用)分布式搭建实战

Hadoop是一个分布式系统基础架构,包含了一个分布式计算框架(MapReduce)和一个分布式文件系统(HDFS)。高可用分布式是级别较高的一种集群方式,也是在生产环境中最常用的一种方式。

实验环境:

  • Centos 7主机三台
主机名 IP地址
Master 10.30.59.130
Slave1 10.30.59.131
Slave2 10.30.59.132

软件要求:

软件名称 软件版本
JDK 8u77
Zookeeper 3.4.5
Hadoop 2.6.0
  • 软件约定:
    • 安装包在 /opt/soft
    • 安装目录在 /opt

先决条件:

  • 各个主机已完成 主机名-IP 映射
  • 各个主机已完成ssh免密登录
  • zookeeper已部署完毕并启动

实验步骤:

一、关闭防火墙与SELinux

  • 三个节点均需此操作
1
2
3
4
5
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld
Removed symlink /etc/systemd/system/multi-user.target.wants/firewalld.service.
Removed symlink /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service.
[root@master ~]# setenforce 0

三个节点均需此操作

二、解压组件

1
2
3
4
5
[root@master ~]# cd /opt 
[root@master opt]# tar -xzvf soft/jdk-8u77-linux-x64.tar.gz
[root@master opt]# tar -xzvf soft/hadoop-2.6.0.tar.gz
[root@master opt]# mv jdk1.8.0_77/ jdk
[root@master opt]# mv hadoop-2.6.0/ hadoop

三、填写配置文件

1
[root@master opt]# vi haoop/etc/hadoop/hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<!-- 新NS的逻辑名称 -->
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<!-- NS中每个NN的唯一标识符 -->
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<property>
<!-- NN监听的RPC地址 -->
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>master:9000</value>
</property>
<property>
<!-- NN监听的HTTP地址 -->
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>master:50070</value>
</property>
<property>
<!-- NN监听的RPC地址 -->
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>slave1:9000</value>
</property>
<property>
<!-- NN监听的HTTP地址 -->
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>slave1:50070</value>
</property>
<property>
<!-- 共享存储目录的位置 -->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master;slave1;slave2/ns1</value>
</property>
<property>
<!-- jn本地文件路径 -->
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop-repo/journal</value>
</property>
<property>
<!-- 指定namenode的name文件路径,指定两个路径用于冗余 -->
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop-repo/name1,file:///opt/hadoop-repo/name2</value>
</property>
<property>
<!-- 指定datenode的data文件路径,指定两个路径用于冗余 -->
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop-repo/data1,file:///opt/hadoop-repo/data2</value>
</property>
<property>
<!-- 开启HA自动故障转移 -->
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<!-- 指定HA自动故障转移代理的java类 -->
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<!-- 用于故障转移期间用来隔离Active NN的脚本或Java类的列表 -->
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<property>
<!-- 指定隔离Active NN脚本的ssh私钥 -->
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
</configuration>
1
[root@master opt]# vi hadoop/etc/hadoop/core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<!-- 指定默认文件系统名称 -->
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<!-- 指定默认临时文件目录 -->
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-repo/tmp</value>
</property>
<property>
<!-- 由ZKFailoverController在自动故障转移中使用的zk服务器地址 -->
<name>ha.zookeeper.quorum</name>
<value>master,slave1,slave2</value>
</property>
</configuration>
1
2
[root@master opt]# cp haeoop/etc/hadoop/mapred-site.xml.template hadoop/etc/hadoop/mapred-site.xml 
[root@master opt]# vi hadoop/etc/hadoop/mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<!-- 指定MR的运行框架 -->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<!-- 提交作业时使用的目录。 -->
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/opt/hadoop-repo/history</value>
</property>
</configuration>
1
[root@master opt]# vi hadoop/etc/hadoop/yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<!-- 启用RM HA -->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<!-- 指定RM集群标识名 -->
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<property>
<!-- RM的逻辑ID列表 -->
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<!-- RM逻辑ID对应的主机名 -->
<name>yarn.resourcemanager.hostname.rm1</name>
<value>slave1</value>
</property>
<property>
<!-- RM逻辑ID对应的主机名 -->
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave2</value>
</property>
<property>
<!-- ZK仲裁的地址 -->
<name>yarn.resourcemanager.zk-address</name>
<value>master,slave1,slave2</value>
</property>
<property>
<!-- 指定yarn节点管理器名称 -->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!-- 启用日志聚合功能 -->
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
</configuration>
1
[root@master opt]# vi hadoop/etc/hadoop/slaves
1
2
3
master
slave1
slave2

四、配置环境变量并令其立即生效

1
2
3
4
5
6
7
8
[root@master opt]# vi /etc/profile.d/hadoop-etc.sh
export JAVA_HOME=/opt/jdk
export PATH=$PATH:$JAVA_HOME/bin

export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

[root@master opt]# source /etc/profile.d/hadoop-etc.sh

五、同步两台slave节点的配置并令其环境变量立即生效

1
2
3
4
[root@master opt]# scp -r hadoop slave1:/opt 
[root@master opt]# scp -r hadoop slave2:/opt
[root@master opt]# scp /etc/profile.d/hadoop-etc.sh slave1:/etc/profile.d
[root@master opt]# scp /etc/profile.d/hadoop-etc.sh slave2:/etc/profile.d
  • 在两台slave节点上令其环境变量立即生效
1
[root@slave1 ~]# source /etc/profile.d/hadoop-etc.sh
1
[root@slave2 ~]# source /etc/profile.d/hadoop-etc.sh

六、初始化HDFS

  • 三台机器均需启动
1
[root@master opt]# hadoop-daemon.sh start journalnode
  • 在master节点上对HDFS和ZK进行格式化
1
2
3
[root@master opt]# hdfs namenode -format
[root@master opt]# hdfs zkfc -formatZK
[root@master opt]# hadoop-daemon.sh start namenode
  • 在slave1节点上将master已经格式化后的元数据目录的内容同步到本机
1
[root@slave1 ~]# hdfs namenode -bootstrapStandby

七、启动Hadoop

  • 在master上启动
1
[root@master opt]# start-dfs.sh 
  • 在slave1上启动
1
[root@slave1 ~]# start-yarn.sh 
  • 在slave2上启动
1
[root@slave2 ~]# yarn-daemon.sh start resourcemanager 
  • 三台均需启动
1
[root@master opt]# mr-jobhistory-daemon.sh start historyserver 

实验验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
[root@master opt]# jps
2786 NameNode
2387 DataNode
3203 Jps
1813 QuorumPeerMain
2662 DFSZKFailoverController
3161 JobHistoryServer
1978 JournalNode

[root@slave1 ~]# jps
3249 NodeManager
2403 DFSZKFailoverController
2292 DataNode
1926 JournalNode
1800 QuorumPeerMain
2201 NameNode
2940 JobHistoryServer
3724 Jps
3437 ResourceManager

[root@slave2 ~]# jps
1810 QuorumPeerMain
11673 NodeManager
11914 Jps
1931 JournalNode
11851 ResourceManager
2012 DataNode
11500 JobHistoryServer

[root@master opt]# hdfs dfsadmin -report
Configured Capacity: 93344772096 (86.93 GB)
Present Capacity: 86898683904 (80.93 GB)
DFS Remaining: 86898671616 (80.93 GB)
DFS Used: 12288 (12 KB)
DFS Used%: 0.00%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0

-------------------------------------------------
Live datanodes (3):

Name: 10.30.59.130:50010 (master)
Hostname: master
Decommission Status : Normal
Configured Capacity: 31114924032 (28.98 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 2409914368 (2.24 GB)
DFS Remaining: 28705005568 (26.73 GB)
DFS Used%: 0.00%
DFS Remaining%: 92.25%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Sat Jun 08 02:17:32 CST 2019


Name: 10.30.59.131:50010 (slave1)
Hostname: slave1
Decommission Status : Normal
Configured Capacity: 31114924032 (28.98 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 2017943552 (1.88 GB)
DFS Remaining: 29096976384 (27.10 GB)
DFS Used%: 0.00%
DFS Remaining%: 93.51%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Sat Jun 08 02:17:35 CST 2019


Name: 10.30.59.132:50010 (slave2)
Hostname: slave2
Decommission Status : Normal
Configured Capacity: 31114924032 (28.98 GB)
DFS Used: 4096 (4 KB)
Non DFS Used: 2018230272 (1.88 GB)
DFS Remaining: 29096689664 (27.10 GB)
DFS Used%: 0.00%
DFS Remaining%: 93.51%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Sat Jun 08 02:17:32 CST 2019


且以下页面均有正常显示