Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!
https://medium.com/mrpowers/manually-creating-spark-dataframes-b14dae906393
Seq.toDF¶
toDF() provides a concise syntax for creating DataFrames and can be accessed after importing Spark implicits.
import spark.implicits._SparkSession.createDataFrame¶
%%classpath add mvn
org.apache.spark spark-core_2.11 2.1.1
org.apache.spark spark-sql_2.11 2.1.1Loading...
Loading...
import org.apache.spark.sql.{SparkSession, Row}
import org.apache.spark.sql.types._
val spark = SparkSession.builder()
.master("local")
.appName("createDF example")
.config("spark.some.config.option", "some-value")
.getOrCreate()
spark
import spark.implicits._org.apache.spark.sql.SparkSession$implicits$@2d2d2d87val someDF = Seq(
(8, "bat"),
(64, "mouse"),
(-27, "horse")
).toDF("Number", "Words")[Number: int, Words: string]someDF.show()
someDF.printSchema()+------+-----+
|Number|Words|
+------+-----+
| 8| bat|
| 64|mouse|
| -27|horse|
+------+-----+
root
|-- Number: integer (nullable = false)
|-- Words: string (nullable = true)
null
val someData = Seq(
Row(8, "bat"),
Row(64, "mouse"),
Row(-27, "horse")
)
val someSchema = List(
StructField("number", IntegerType, true),
StructField("word", StringType, true)
)
val someDF = spark.createDataFrame(
spark.sparkContext.parallelize(someData),
StructType(someSchema)
)[number: int, word: string]Seq(1, 2, 3).toDF.showcreateDF() is defined in spark-daria and allows for the following terse syntax.
// val someDF = spark.createDF(
// List(
// (8, "bat"),
// (64, "mouse"),
// (-27, "horse")
// ), List(
// ("number", IntegerType, true),
// ("word", StringType, true)
// )
// )input is incomplete