手记

spark thrift server HA

实际使用过程中发现这篇帖子的实现不够严谨,且有错误的地方

1、只考虑了在spark thrift server启动时将服务注册到zookeeper中,但当服务失效后没有考虑注销的问题。这样会导致服务失效后的几分钟后,zookeper内还有改服务的注册信息,这个时候如果客户端尝试通过zookeeper获取实例执行sql查询的话,会导致执行失败。

2、在两处(startWithContext,main)启动hiveserver的途径中增加zookeeper注册服务的代码,代码重复。可以考虑在hiveserver本身的start和stop两个方法入手

3、配置文件中hive.server2.thrift.bind.host属性不能设置为0.0.0.0,这会导致多个spark-thrift-server实例在zookeeper中注册的地址都为无意义的0.0.0.0。这个问题当你在服务启动的节点测试连接的时候是不会出现问题的,因为确实指向了本机……,但当你尝试远程连接的时候就会发现无法正常连接了

最后我是这么改的:

注意,以下修改都是针对spark-hive-thriftserver_XXX.jar所做的修改

1、修改org.apache.spark.sql.hive.thriftserver的内部类HiveThriftServer2

private[hive] class HiveThriftServer2(sqlContext: SQLContext)
  extends HiveServer2
    with ReflectedCompositeService {  // state is tracked internally so that the server only attempts to shut down if it successfully
  // started, and then once only.
  private val started = new AtomicBoolean(false)  var hiveConf: HiveConf = _  override def init(hiveConf: HiveConf) {    this.hiveConf = hiveConf;    val sparkSqlCliService = new SparkSQLCLIService(this, sqlContext)
    setSuperField(this, "cliService", sparkSqlCliService)
    addService(sparkSqlCliService)    val thriftCliService = if (isHTTPTransportMode(hiveConf)) {      new ThriftHttpCLIService(sparkSqlCliService)
    } else {      new ThriftBinaryCLIService(sparkSqlCliService)
    }

    setSuperField(this, "thriftCLIService", thriftCliService)
    addService(thriftCliService)
    initCompositeService(hiveConf)
  }  private def isHTTPTransportMode(hiveConf: HiveConf): Boolean = {    val transportMode = hiveConf.getVar(ConfVars.HIVE_SERVER2_TRANSPORT_MODE)
    transportMode.toLowerCase(Locale.ROOT).equals("http")
  }  override def start(): Unit = {    super.start()
    started.set(true)    //增加高可用支持,向zookeeper中注册当前服务
    if (this.hiveConf.getBoolVar(      ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) {
      invoke(classOf[HiveServer2], this, "addServerInstanceToZooKeeper",
        classOf[HiveConf] -> this.hiveConf)
    }    //高可用支持代码结束
  }  override def stop(): Unit = {    if (started.getAndSet(false)) {      //关闭zookeeper链接,注销服务
      if (this.hiveConf.getBoolVar(        ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) {
        invoke(classOf[HiveServer2], this, "removeServerInstanceFromZooKeeper")
      }      super.stop()
    }
  }

}

2、修改org.apache.hive.service.server.HiveServer2,增加addServerInstanceToZooKeeper,removeServerInstanceFromZooKeeper两个方法,使第一步的修改能够通过反射调用到

/**
 * StartOptionExecutor: starts HiveServer2.
 * This is the default executor, when no option is specified.
 */static class StartOptionExecutor implements ServerOptionsExecutor {    @Override
    public void execute() {        try {
            startHiveServer2();
        } catch (Throwable t) {
            LOG.fatal("Error starting HiveServer2", t);
            System.exit(-1);
        }
    }
}private String getServerInstanceURI() throws Exception {    if ((thriftCLIService == null) || (thriftCLIService.getServerIPAddress() == null)) {        throw new Exception("Unable to get the server address; it hasn't been initialized yet.");
    }    return getHiveHost() + ":"
            + thriftCLIService.getPortNumber();
}private String getHiveHost() {
    HiveConf hiveConf = thriftCLIService.getHiveConf();
    String hiveHost = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST);    if (hiveHost != null && !hiveHost.isEmpty()) {        return hiveHost;
    } else {        return thriftCLIService.getServerIPAddress().getHostName();
    }
}/**
 * For a kerberized cluster, we dynamically set up the client's JAAS conf.
 *
 * @param hiveConf
 * @return
 * @throws Exception
 */private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception {    if (UserGroupInformation.isSecurityEnabled()) {
        String principal = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);        if (principal.isEmpty()) {            throw new IOException("HiveServer2 Kerberos principal is empty");
        }
        String keyTabFile = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);        if (keyTabFile.isEmpty()) {            throw new IOException("HiveServer2 Kerberos keytab is empty");
        }        // Install the JAAS Configuration for the runtime
        Utils.setZookeeperClientKerberosJaasConfig(principal, keyTabFile);
    }
}/**
 * ACLProvider for providing appropriate ACLs to CuratorFrameworkFactory
 */private final ACLProvider zooKeeperAclProvider = new ACLProvider() {    @Override
    public List<ACL> getDefaultAcl() {
        List<ACL> nodeAcls = new ArrayList<ACL>();        if (UserGroupInformation.isSecurityEnabled()) {            // Read all to the world
            nodeAcls.addAll(ZooDefs.Ids.READ_ACL_UNSAFE);            // Create/Delete/Write/Admin to the authenticated user
            nodeAcls.add(new ACL(ZooDefs.Perms.ALL, ZooDefs.Ids.AUTH_IDS));
        } else {            // ACLs for znodes on a non-kerberized cluster
            // Create/Read/Delete/Write/Admin to the world
            nodeAcls.addAll(ZooDefs.Ids.OPEN_ACL_UNSAFE);
        }        return nodeAcls;
    }    @Override
    public List<ACL> getAclForPath(String path) {        return getDefaultAcl();
    }
};private void setDeregisteredWithZooKeeper(boolean deregisteredWithZooKeeper) {    this.deregisteredWithZooKeeper = deregisteredWithZooKeeper;
}/**
 * The watcher class which sets the de-register flag when the znode corresponding to this server
 * instance is deleted. Additionally, it shuts down the server if there are no more active client
 * sessions at the time of receiving a 'NodeDeleted' notification from ZooKeeper.
 */private class DeRegisterWatcher implements Watcher {    @Override
    public void process(WatchedEvent event) {        if (event.getType().equals(Watcher.Event.EventType.NodeDeleted)) {            if (znode != null) {                try {
                    znode.close();
                    LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. "
                            + "The server will be shut down after the last client sesssion completes.");
                } catch (IOException e) {
                    LOG.error("Failed to close the persistent ephemeral znode", e);
                } finally {
                    HiveServer2.this.setDeregisteredWithZooKeeper(true);                    // If there are no more active client sessions, stop the server
                    if (cliService.getSessionManager().getOpenSessionCount() == 0) {
                        LOG.warn("This instance of HiveServer2 has been removed from the list of server "
                                + "instances available for dynamic service discovery. "
                                + "The last client session has ended - will shutdown now.");
                        HiveServer2.this.stop();
                    }
                }
            }
        }
    }
} 

/**
     * Adds a server instance to ZooKeeper as a znode.
     *
     * @param hiveConf
     * @throws Exception
     */
    private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception {
        String zooKeeperEnsemble = ZooKeeperHiveHelper.getQuorumServers(hiveConf);
        String rootNamespace = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE);
        String instanceURI = getServerInstanceURI();
        setUpZooKeeperAuth(hiveConf);        int sessionTimeout =
                (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT,
                        TimeUnit.MILLISECONDS);        int baseSleepTime =
                (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME,
                        TimeUnit.MILLISECONDS);        int maxRetries = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES);        // Create a CuratorFramework instance to be used as the ZooKeeper client
        // Use the zooKeeperAclProvider to create appropriate ACLs
        zooKeeperClient =
                CuratorFrameworkFactory.builder().connectString(zooKeeperEnsemble)
                        .sessionTimeoutMs(sessionTimeout).aclProvider(zooKeeperAclProvider)
                        .retryPolicy(new ExponentialBackoffRetry(baseSleepTime, maxRetries)).build();
        zooKeeperClient.start();        // Create the parent znodes recursively; ignore if the parent already exists.
        try {
            zooKeeperClient.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT)
                    .forPath(ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace);
            LOG.info("Created the root name space: " + rootNamespace + " on ZooKeeper for HiveServer2");
        } catch (KeeperException e) {            if (e.code() != KeeperException.Code.NODEEXISTS) {
                LOG.fatal("Unable to create HiveServer2 namespace: " + rootNamespace + " on ZooKeeper", e);                throw e;
            }
        }        // Create a znode under the rootNamespace parent for this instance of the server
        // Znode name: serverUri=host:port;version=versionInfo;sequence=sequenceNumber
        try {
            String pathPrefix =
                    ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace
                            + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + "serverUri=" + instanceURI + ";"
                            + "version=" + HiveVersionInfo.getVersion() + ";" + "sequence=";//            String znodeData = "";//            if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_PUBLISH_CONFIGS)) {//                // HiveServer2 configs that this instance will publish to ZooKeeper,//                // so that the clients can read these and configure themselves properly.//                Map<String, String> confsToPublish = new HashMap<String, String>();//                addConfsToPublish(hiveConf, confsToPublish);//                // Publish configs for this instance as the data on the node//                znodeData = Joiner.on(';').withKeyValueSeparator("=").join(confsToPublish);//            } else {//                znodeData = instanceURI;//            }
            byte[] znodeDataUTF8 = instanceURI.getBytes(Charset.forName("UTF-8"));
            znode =                    new PersistentEphemeralNode(zooKeeperClient,
                            PersistentEphemeralNode.Mode.EPHEMERAL_SEQUENTIAL, pathPrefix, znodeDataUTF8);
            znode.start();            // We'll wait for 120s for node creation
            long znodeCreationTimeout = 120;            if (!znode.waitForInitialCreate(znodeCreationTimeout, TimeUnit.SECONDS)) {                throw new Exception("Max znode creation wait time: " + znodeCreationTimeout + "s exhausted");
            }
            setDeregisteredWithZooKeeper(false);
            znodePath = znode.getActualPath();            // Set a watch on the znode
            if (zooKeeperClient.checkExists().usingWatcher(new DeRegisterWatcher()).forPath(znodePath) == null) {                // No node exists, throw exception
                throw new Exception("Unable to create znode for this HiveServer2 instance on ZooKeeper.");
            }
            LOG.info("Created a znode on ZooKeeper for HiveServer2 uri: " + instanceURI);
        } catch (Exception e) {
            LOG.fatal("Unable to create a znode for this server instance", e);            if (znode != null) {
                znode.close();
            }            throw (e);
        }
    }    private void removeServerInstanceFromZooKeeper() throws Exception {
        setDeregisteredWithZooKeeper(true);        if (znode != null) {
            znode.close();
        }
        zooKeeperClient.close();
        LOG.info("Server instance removed from ZooKeeper.");
    }

另外,如果考虑提供的spark-thrift-server服务可能为集群外调用,且无法通过hostname连接到服务,就需要修改zookeeper中的注册信息为ip

主要修改就是HiveServer2类中的getServerInstanceURI方法

private String getServerInstanceURI() throws Exception {    if ((thriftCLIService == null) || (thriftCLIService.getServerIPAddress() == null)) {        throw new Exception("Unable to get the server address; it hasn't been initialized yet.");
    }    return getHiveHost() + ":"
            + thriftCLIService.getPortNumber();
}private String getHiveHost() {
    HiveConf hiveConf = thriftCLIService.getHiveConf();
    String hiveHost = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST);    if (hiveHost != null && !hiveHost.isEmpty()) {        return hiveHost;
    } else {        return thriftCLIService.getServerIPAddress().getHostName();
    }
}



作者:ron_yang
链接:https://www.jianshu.com/p/eaf615284f67


0人推荐
随时随地看视频
慕课网APP