Ben Chuanlong Du's Blog

It is never too late to learn.

Row Object in Spark

In [1]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.3.1
org.apache.spark spark-sql_2.11 2.3.1
In [2]:
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.Row

val spark = SparkSession.builder()
    .master("local[2]")
    .appName("Spark Example")
    .config("spark.some.config.option", "some-value")
    .getOrCreate()

import spark.implicits._
Out[2]:
org.apache.spark.sql.SparkSession$implicits$@78b8022e
In [3]:
import org.apache.spark.sql.functions._

val df = Seq(
    ("Ben", "Du", 1),
    ("Ben", "Du", 2),
    ("Ben", "Tu", 3),
    ("Ben", "Tu", 4),
    ("Ken", "Xu", 1),
    ("Ken", "Xu", 9)
).toDF("fname", "lname", "score")
df.show
+-----+-----+-----+
|fname|lname|score|
+-----+-----+-----+
|  Ben|   Du|    1|
|  Ben|   Du|    2|
|  Ben|   Tu|    3|
|  Ben|   Tu|    4|
|  Ken|   Xu|    1|
|  Ken|   Xu|    9|
+-----+-----+-----+

Out[3]:
null

Get the First Row

In [5]:
df.first
Out[5]:
[Ben,Du,1]

Get the first element of the first row as String.

In [6]:
df.first.getString(0)
Out[6]:
Ben

Construct a Row

In [17]:
Row(1, "how", 'i', 2.0)
Out[17]:
[1,how,i,2.0]
In [19]:
Row.fromSeq(Seq(1, "how", true, 2.0))
Out[19]:
[1,how,true,2.0]

Get Elements of a Row

In [20]:
val row = Row(1, "how", 'i', 2.0)
row
Out[20]:
[1,how,i,2.0]
In [21]:
row.getInt(0)
Out[21]:
1
In [24]:
row.getAs(0)
java.lang.ClassCastException: java.lang.Integer cannot be cast to scala.runtime.Nothing$
  ... 52 elided
In [25]:
row.getString(1)
Out[25]:
how
In [26]:
row.getAs(1)
java.lang.ClassCastException: java.lang.String cannot be cast to scala.runtime.Nothing$
  ... 52 elided

Comments