慕的地8271018
#You can be able to do it with the below approachval input_rdd = spark.sparkContext.parallelize(List(("a1", "b1", "c1"), ("a1", "b2", "c1"), ("a1", "b1", "c2"), ("a2", "b2", "c2"))) val input_df = input_rdd.toDF("Address1", "Address2", "Address3") input_df.show()+--------+--------+--------+|Address1|Address2|Address3|+--------+--------+--------+| a1| b1| c1|| a1| b2| c1|| a1| b1| c2|| a2| b2| c2|+--------+--------+--------+ val out_address1_df = input_df.groupBy("Address1").agg(count(input_df("Address1")).as("count_address1")). select(input_df("Address1").as("ADDRESS"), col("count_address1").as("COUNT")) //out_address1_df.show() val out_address2_df = input_df.groupBy("Address2").agg(count(input_df("Address2")).as("count_address2")). select(input_df("Address2").as("ADDRESS"), col("count_address2").as("COUNT")) //out_address2_df.show() val out_address3_df = input_df.groupBy("Address3").agg(count(input_df("Address3")).as("count_address3")). select(input_df("Address3").as("ADDRESS"), col("count_address3").as("COUNT")) val output_df = out_address1_df.unionAll(out_address2_df).unionAll(out_address3_df) output_df.show()+-------+-----+|ADDRESS|COUNT|+-------+-----+| a2| 1|| a1| 3|| b2| 2|| b1| 2|| c1| 2|| c2| 2|+-------+-----+