Yarn HA 文档
配置 Yarn 分配集群资源
yarn-site.xml 配置
<!-- Base Configs --> <property> <description>Classpath for typical applications.</description> <name>yarn.application.classpath</name> <value> $HADOOP_CONF_DIR, $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*, $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*, $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*, $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/* </value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>file:///data/1/yarn/local,file:///data/2/yarn/local,file:///data/3/yarn/local</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>file:///data/1/yarn/logs,file:///data/2/yarn/logs,file:///data/3/yarn/logs</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/var/log/hadoop-yarn/apps</value> </property> <!-- HA Configs --> <property> <name>yarn.resourcemanager.connect.retry-interval.ms</name> <value>2000</value> </property> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.embedded</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>xiwu-yarn-rm-cluster</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.ha.id</name> <value>rm1</value> </property> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value> </property> <property> <name>yarn.scheduler.fair.allow-undeclared-pools</name> <value>true</value> </property> <property> <name>yarn.scheduler.fair.user-as-default-queue</name> <value>true</value> </property> <property> <name>yarn.scheduler.fair.preemption</name> <value>true</value> </property> <property> <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name> <value>0.8</value> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>worker1:2181,worker2:2181,worker3:2181</value> </property> <property> <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> <value>5000</value> </property> <property> <name>yarn.resourcemanager.address.rm1</name> <value>worker1:23140</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>worker1:23130</value> </property> <property> <name>yarn.resourcemanager.webapp.https.address.rm1</name> <value>worker1:23189</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>worker1:23188</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>worker1:23125</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>worker1:23141</value> </property> <property> <name>yarn.resourcemanager.address.rm2</name> <value>worker2:23140</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>worker2:23130</value> </property> <property> <name>yarn.resourcemanager.webapp.https.address.rm2</name> <value>worker2:23189</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>worker2:23188</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>worker2:23125</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>worker2:23141</value> </property> <property> <description>Address where the localizer IPC is.</description> <name>yarn.nodemanager.localizer.address</name> <value>0.0.0.0:23344</value> </property> <property> <description>NM Webapp address.</description> <name>yarn.nodemanager.webapp.address</name> <value>0.0.0.0:23999</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/tmp/pseudo-dist/yarn/local</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/tmp/pseudo-dist/yarn/log</value> </property> <property> <name>mapreduce.shuffle.port</name> <value>23080</value> </property>
PS. yarn.resourcemanager.ha.id 请注意
在两台master机器上 配置里的value要不一样 rm1 or rm2
启动 NodeManager 服务的机器上
sudo mkdir -p /data/1/yarn/local /data/2/yarn/local /data/3/yarn/local /data/4/yarn/localsudo mkdir -p /data/1/yarn/logs /data/2/yarn/logs /data/3/yarn/logs /data/4/yarn/logs sudo chown -R yarn:yarn /data/1/yarn/local /data/2/yarn/local /data/3/yarn/local /data/4/yarn/localsudo chown -R yarn:yarn /data/1/yarn/logs /data/2/yarn/logs /data/3/yarn/logs /data/4/yarn/logs
mapred-site.xml
<property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>worker3:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>worker3:19888</value> </property> <property> <!--make sure proxying is enabled for the mapred user--> <name>hadoop.proxyuser.mapred.groups</name> <value>*</value> </property> <property> <!--make sure proxying is enabled for the mapred user--> <name>hadoop.proxyuser.mapred.hosts</name> <value>*</value> </property> <property> <!--configure staging directory--> <name>yarn.app.mapreduce.am.staging-dir</name> <value>/user</value> </property> <property> <name>mapreduce.jobhistory.done-dir</name> <value>/user/history/done</value> </property> <property> <name>mapreduce.jobhistory.intermediate-done-dir</name> <value>/user/history/intermediate</value> </property>
添加文件夹和配置权限
sudo -u hdfs hadoop fs -mkdir -p /user/historysudo -u hdfs hadoop fs -chmod -R 1777 /user/historysudo -u hdfs hadoop fs -mkdir -p /user/history/donesudo -u hdfs hadoop fs -chmod -R 750 /user/history/donesudo -u hdfs hadoop fs -mkdir -p /user/history/intermediate sudo -u hdfs hadoop fs -chmod -R 1777 /user/history/intermediate sudo -u hdfs hadoop fs -chown mapred:hadoop /user/historysudo -u hdfs hadoop fs -mkdir /user/root sudo -u hdfs hadoop fs -chown root /user/root
作者:阿武z
链接:https://www.jianshu.com/p/05ca2becc29e