Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
https://
https://
%%classpath add mvn
org.apache.spark spark-core_2.11 2.1.1
org.apache.spark spark-sql_2.11 2.1.1Loading...
Loading...
import org.apache.spark.sql.SparkSession
val spark = SparkSession.
builder().
master("local").
appName("Spark SQL basic example").
config("spark.some.config.option", "some-value").
getOrCreate()
spark
import spark.implicits._org.apache.spark.sql.SparkSession$implicits$@7e12bd6bval df = spark.read.json("../../data/people.json")
df.show+----+-------+
| age| name|
+----+-------+
|null|Michael|
| 30| Andy|
| 19| Justin|
+----+-------+
nullFiltering¶
df.filter($"age" > 21).show()+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
nullimport org.apache.spark.sql.Column
def greaterThan(column: Column, v: Int):
Column = {column > v}import org.apache.spark.sql.Column
greaterThan: (column: org.apache.spark.sql.Column, v: Int)org.apache.spark.sql.Column
df.filter(greaterThan(df("age"), 21)).show+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
nulldf.filter(greaterThan($"age", 21)).show+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
nulldf.filter($"age" === 30).show+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
nulldf.filter($"age" === null).show+---+----+
|age|name|
+---+----+
+---+----+
nulldf.filter($"age" <=> 30).show+---+----+
|age|name|
+---+----+
| 30|Andy|
+---+----+
nulldf.filter($"age" <=> null).show+----+-------+
| age| name|
+----+-------+
|null|Michael|
+----+-------+
null