Ben Chuanlong Du's Blog

It is never too late to learn.

Using Temporary Columns in Spark

In [2]:
import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder().master("local[2]")
    .appName("Nested")
    .getOrCreate()
spark
Out[2]:
org.apache.spark.sql.SparkSession@74a7255c
In [8]:
import spark.implicits._
import org.apache.spark.sql.functions._
Out[8]:
org.apache.spark.sql.SparkSession$implicits$@1cdc4527
In [6]:
val df = Seq(
  (8, "bat"),
  (64, "mouse"),
  (-27, "horse")
).toDF("number", "words")
df.show
+------+-----+
|number|words|
+------+-----+
|     8|  bat|
|    64|mouse|
|   -27|horse|
+------+-----+

Out[6]:
null
In [9]:
df.withColumn("n2", {
    val tempCol = df("number") / 4
    when(tempCol > 0, tempCol).otherwise(0)
}).show
+------+-----+----+
|number|words|  n2|
+------+-----+----+
|     8|  bat| 2.0|
|    64|mouse|16.0|
|   -27|horse| 0.0|
+------+-----+----+

In [10]:
df.withColumn("n2", {
    val tempCol = $"number" / 4
    when(tempCol > 0, tempCol).otherwise(0)
}).show
+------+-----+----+
|number|words|  n2|
+------+-----+----+
|     8|  bat| 2.0|
|    64|mouse|16.0|
|   -27|horse| 0.0|
+------+-----+----+

In [11]:
df.withColumn("n2", {
    val tempCol = $"number" / 4
    when(tempCol > 0, tempCol).otherwise(0)
}).withColumn("n3", {
    val tempCol = $"n2" / 2
    when(tempCol < 2, tempCol + 1).otherwise(tempCol)
}).show
+------+-----+----+---+
|number|words|  n2| n3|
+------+-----+----+---+
|     8|  bat| 2.0|2.0|
|    64|mouse|16.0|8.0|
|   -27|horse| 0.0|1.0|
+------+-----+----+---+

In [ ]:
 

Comments