spark sql join

目录

 

join的类型

join的重载方法

join示例

inner join

outer join&full join&fullouter join

left join & leftouter join

right join& rightouter join

leftsemi join

leftanti

cross join


join的类型

调用join方法时,传字符串,匹配join类型

#JoinType
case "inner" => Inner
case "outer" | "full" | "fullouter" => FullOuter
case "leftouter" | "left" => LeftOuter
case "rightouter" | "right" => RightOuter
case "leftsemi" => LeftSemi
case "leftanti" => LeftAnti
case "cross" => Cross

join的重载方法

def join(right: Dataset[_]): DataFrame = withPlan {
  Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
}
//指定单个连接字段
def join(right: Dataset[_], usingColumn: String): DataFrame = {
  join(right, Seq(usingColumn))
}
//指定多个连接字段
def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame = {
  join(right, usingColumns, "inner")
}
//指定连接字段,并指定类型
def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame ={
....
}
//不指定join类型,默认inner
def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)]
//指定join类型
def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)]

 

join示例

val df=spark.createDataset(Seq(("tom",21,19),("jerry",31,19),("jack",32,18)))
   .toDF("name","age","salary")
 val df1=spark.createDataset(Seq(("tom","a"),("jerry","b"),("tony","d")))
   .toDF("name1","grade")

inner join

默认join,不指定类型时为inner join,返回两边都符合的数据

df.join(df1).where($"name"===$"name1").show()
df.join(df1,df("name")===df1("name1")).show()
//可以将连接的字段名,改成一样的,连接的字段只有一列,如下图第二张
val df2=df1.withColumnRenamed("name1","name")
df.join(df2,"name").show()
df.join(df2,Seq("name")).show()

spark sql join

spark sql join

outer join&full join&fullouter join

外连接,返回两边所有数据,没值的返回null

df.join(df2,Seq("name"),"outer").show()

spark sql join

left join & leftouter join

左连接,返回左表所有,未join到的右表值为null

df.join(df2,Seq("name"),"left").show()

spark sql join

right join& rightouter join

右连接,返回右表所有,未join到的左表值为null

df.join(df2,Seq("name"),"right").show()

spark sql join

leftsemi join

类似in吧

df.join(df2,Seq("name"),"leftsemi").show()
//select * from df where name in (select name from df2);

spark sql join

df2.join(df,Seq("name"),"leftsemi").show()
//select * from df2 where name in (select name from df);

spark sql join

leftanti

类似not in吧

df.join(df2,Seq("name"),"leftanti").show()
//select * from df where name not in (select name from df2);

spark sql join

cross join

传说中的笛卡儿积

df.crossJoin(df2).show()

spark sql join