手记

阿里云搭建CDH (Step 2: 启动yarn)

Yarn HA 文档

Yarn HA 文档

配置 Yarn  分配集群资源

文档地址

yarn-site.xml 配置

<!-- Base Configs -->  
<property>
   <description>Classpath for typical applications.</description>
   <name>yarn.application.classpath</name>
   <value>      $HADOOP_CONF_DIR,      $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,      $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,      $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,      $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
   </value>
</property>
<property>
  <name>yarn.log-aggregation-enable</name>
  <value>true</value>
</property>
<property>
  <name>yarn.nodemanager.local-dirs</name>
  <value>file:///data/1/yarn/local,file:///data/2/yarn/local,file:///data/3/yarn/local</value>
</property>
<property>
  <name>yarn.nodemanager.log-dirs</name>
  <value>file:///data/1/yarn/logs,file:///data/2/yarn/logs,file:///data/3/yarn/logs</value>
</property>
<property>
  <name>yarn.nodemanager.remote-app-log-dir</name>
  <value>/var/log/hadoop-yarn/apps</value>
</property>


<!-- HA Configs -->
  <property>
    <name>yarn.resourcemanager.connect.retry-interval.ms</name>
    <value>2000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>xiwu-yarn-rm-cluster</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.id</name>
    <value>rm1</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.allow-undeclared-pools</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.user-as-default-queue</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.preemption</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
    <value>0.8</value>
  </property>

  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>worker1:2181,worker2:2181,worker3:2181</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
    <value>5000</value>
  </property>

  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>worker1:23140</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>worker1:23130</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>worker1:23189</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>worker1:23188</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>worker1:23125</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>worker1:23141</value>
  </property>

  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>worker2:23140</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>worker2:23130</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>worker2:23189</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>worker2:23188</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>worker2:23125</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>worker2:23141</value>
  </property>
  <property>
    <description>Address where the localizer IPC is.</description>
    <name>yarn.nodemanager.localizer.address</name>
    <value>0.0.0.0:23344</value>
  </property>
  <property>
    <description>NM Webapp address.</description>
    <name>yarn.nodemanager.webapp.address</name>
    <value>0.0.0.0:23999</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/tmp/pseudo-dist/yarn/local</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/tmp/pseudo-dist/yarn/log</value>
  </property>
  <property>
    <name>mapreduce.shuffle.port</name>
    <value>23080</value>
  </property>

PS. yarn.resourcemanager.ha.id 请注意
在两台master机器上 配置里的value要不一样 rm1 or rm2

启动 NodeManager 服务的机器上

sudo mkdir -p /data/1/yarn/local /data/2/yarn/local /data/3/yarn/local /data/4/yarn/localsudo mkdir -p /data/1/yarn/logs /data/2/yarn/logs /data/3/yarn/logs /data/4/yarn/logs
sudo chown -R yarn:yarn /data/1/yarn/local /data/2/yarn/local /data/3/yarn/local /data/4/yarn/localsudo chown -R yarn:yarn /data/1/yarn/logs /data/2/yarn/logs /data/3/yarn/logs /data/4/yarn/logs

mapred-site.xml

<property>
  <name>mapreduce.framework.name</name>
  <value>yarn</value>
</property>

<property>
  <name>mapreduce.jobhistory.address</name>
  <value>worker3:10020</value>
</property>

<property>
  <name>mapreduce.jobhistory.webapp.address</name>
  <value>worker3:19888</value>
</property>

<property>
  <!--make sure proxying is enabled for the mapred user-->
  <name>hadoop.proxyuser.mapred.groups</name>
  <value>*</value>
</property>

<property>
  <!--make sure proxying is enabled for the mapred user-->
  <name>hadoop.proxyuser.mapred.hosts</name>
  <value>*</value>
</property>

<property>
  <!--configure staging directory-->
  <name>yarn.app.mapreduce.am.staging-dir</name>
  <value>/user</value>
</property>

<property>
  <name>mapreduce.jobhistory.done-dir</name>
  <value>/user/history/done</value>
</property>

<property>
  <name>mapreduce.jobhistory.intermediate-done-dir</name>
  <value>/user/history/intermediate</value>
</property>

添加文件夹和配置权限

sudo -u hdfs hadoop fs -mkdir -p /user/historysudo -u hdfs hadoop fs -chmod -R 1777 /user/historysudo -u hdfs hadoop fs -mkdir -p /user/history/donesudo -u hdfs hadoop fs -chmod -R 750 /user/history/donesudo -u hdfs hadoop fs -mkdir -p /user/history/intermediate
sudo -u hdfs hadoop fs -chmod -R 1777 /user/history/intermediate
sudo -u hdfs hadoop fs -chown mapred:hadoop /user/historysudo -u hdfs hadoop fs -mkdir /user/root
sudo -u hdfs hadoop fs -chown root /user/root



作者:阿武z
链接:https://www.jianshu.com/p/05ca2becc29e


0人推荐
随时随地看视频
慕课网APP