-
Cats萌萌
你可以用'|'分割你的专栏 像下面导入 spark.implicits._val df = mainDf.select("Column1","Column2").map(x => { val s1 = x.getAsString(0).replaceAll("^.*3_","").split("|"); val s2 = x.getAsString(1).replaceAll("^.*3_","").split("|"); (x.getAsString(0),x.getAsString(1),s2.diff(s1).union(s1.diff(s2)))}).toDF("Column1","Column2","Column3")
-
精慕HU
你也可以通过regexp_replace和udf来达到你的目的。regexp_replace 替换“|” 用“,”和“。* 3_”用“”udf从column2和column1获取column3的值val df1 = Seq(("SAMPLE_OUT_3_APPLE|BANANA|GUAVA|ORANGE" ,"SAMPLE_OUT_3_APPLE|BANANA|GUAVA|GRAPES|ORANGE|BERRY")).toDF("column1","column2") val df2 =df1.columns.foldLeft(df) { (memoDF, colName) => memoDF.withColumn( colName, regexp_replace(regexp_replace(col(colName), "\\|", ","),".*3_",""))}val diff_udf = udf { ( a: String, b: String) => (a.split(",") diff b.split(",")).mkString(",") }df2.withColumn("column3", diff_udf(col("column2"), col("column1"))).show(false)输出:+-------------------------+--------------------------------------+------------+|column1 |column2 |column3 |+-------------------------+--------------------------------------+------------+|APPLE,BANANA,GUAVA,ORANGE|APPLE,BANANA,GUAVA,GRAPES,ORANGE,BERRY|GRAPES,BERRY|+-------------------------+--------------------------------------+------------+
-
慕村9548890
对于Spark >= 2.4您可以使用array_exceptimport spark.implicits._val df = Seq( ("SAMPLE_OUT_3_APPLE|BANANA|GUAVA|ORANGE" ,"SAMPLE_OUT_3_APPLE|BANANA|GUAVA|GRAPES|ORANGE|BERRY")).toDF("column1", "column2")val remove = df.columns.map(column => split(col(column), "3_").getItem(1).as(column))val resultDF = df.select(remove: _*) .withColumn("column1", split($"column1", "\\|")) .withColumn("column2", split($"column2", "\\|")) .withColumn("column3", array_except($"column2", $"column1")) .withColumn("column1", array_except($"column1", $"column3")) .withColumn("column2", array_except($"column2", $"column3"))val convertToString = resultDF.columns.map(column => concat_ws("|", col(column)).as(column))resultDF.select(convertToString: _*).show(false)输出:+-------------------------+-------------------------+------------+|column1 |column2 |column3 |+-------------------------+-------------------------+------------+|APPLE|BANANA|GUAVA|ORANGE|APPLE|BANANA|GUAVA|ORANGE|GRAPES|BERRY|+-------------------------+-------------------------+------------+