Ben Chuanlong Du's Blog

It is never too late to learn.

Use Tablesaw with Kotlin

In [1]:
%%classpath add mvn
tech.tablesaw tablesaw-beakerx 0.36.0
com.jimmoores quandl-tablesaw 2.0.0
In [4]:
import tech.tablesaw.aggregate.AggregateFunctions.*
import tech.tablesaw.api.*
import tech.tablesaw.columns.*
Out[4]:
null

Comments

  1. Tablesaw's usability is still far away from Python pandas or Spark DataFrame (even though it is the best implementation for JVM currently). I suggest you stick to Spark DataFrame or Python pandas at this time.

  2. You can display Tablesaw tables with BeakerX table display widget by running the following command.

     tech.tablesaw.beakerx.TablesawDisplayer.register()

Column

In [6]:
val numbers = arrayOf(1, 2, 3, 4)
val col = DoubleColumn.create("nc", numbers)
println(col.print())
Column: nc
1
2
3
4

Out[6]:
null
In [7]:
col.get(2)
Out[7]:
3.0
In [10]:
col.multiply(4)
Out[10]:
Double column: nc * 4.0
In [11]:
println(col.multiply(4).print())
Column: nc * 4.0
4
8
12
16

Out[11]:
null
In [13]:
col.isLessThan(3.0)
Out[13]:
Selection of size: 2
In [16]:
val c = col.isLessThan(3.0)
c
Out[16]:
Selection of size: 2
In [18]:
println(col.where(c).print())
Column: nc
1
2

Out[18]:
null
In [20]:
col.where(col.isLessThan(3.0).and(col.isPositive()))
Out[20]:
Double column: nc
In [21]:
println(col.where(col.isLessThan(3.0).and(col.isPositive())).print())
Column: nc
1
2

Out[21]:
null
In [22]:
col.where(Selection.with(0, 2))
error: unresolved reference: Selection
col.where(Selection.with(0, 2))
          ^
In [ ]:
col.where(Selection.withRange(1, 3))

Table/DataFrame

In [6]:
val flights = Table.read().csv("../../home/media/data/flights14.csv")
Out[6]:
null
In [5]:
flights.structure()
In [6]:
flights.columnNames()
Out[6]:
[year, month, day, dep_time, dep_delay, arr_time, arr_delay, cancelled, carrier, tailnum, flight, origin, dest, air_time, distance, hour, min]
In [7]:
flights.shape()
Out[7]:
39 rows X 17 cols
In [9]:
flights.first(10)
In [10]:
flights.summary()
Out[10]:
Table summary for: flights14.csv
     Column: year     
 Measure   |  Value  |
----------------------
        n  |     39  |
      sum  |  78546  |
     Mean  |   2014  |
      Min  |   2014  |
      Max  |   2014  |
    Range  |      0  |
 Variance  |      0  |
 Std. Dev  |      0  |
    Column: month     
 Measure   |  Value  |
----------------------
        n  |     39  |
      sum  |     39  |
     Mean  |      1  |
      Min  |      1  |
      Max  |      1  |
    Range  |      0  |
 Variance  |      0  |
 Std. Dev  |      0  |
     Column: day      
 Measure   |  Value  |
----------------------
        n  |     39  |
      sum  |     39  |
     Mean  |      1  |
      Min  |      1  |
      Max  |      1  |
    Range  |      0  |
 Variance  |      0  |
 Std. Dev  |      0  |
         Column: dep_time          
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |               54119  |
     Mean  |  1387.6666666666665  |
      Min  |                 553  |
      Max  |                2133  |
    Range  |                1580  |
 Variance  |  197312.91228070174  |
 Std. Dev  |   444.1991808645101  |
         Column: dep_delay         
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |                 609  |
     Mean  |  15.615384615384615  |
      Min  |                 -10  |
      Max  |                 191  |
    Range  |                 201  |
 Variance  |  1612.7165991902837  |
 Std. Dev  |  40.158642895275776  |
         Column: arr_time         
 Measure   |        Value        |
----------------------------------
        n  |                 39  |
      sum  |              62670  |
     Mean  |  1606.923076923077  |
      Min  |                 37  |
      Max  |               2342  |
    Range  |               2305  |
 Variance  |  276612.8623481781  |
 Std. Dev  |  525.9399797963434  |
         Column: arr_delay         
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |                 671  |
     Mean  |    17.2051282051282  |
      Min  |                 -27  |
      Max  |                 185  |
    Range  |                 212  |
 Variance  |  2045.4831309041833  |
 Std. Dev  |   45.22701770959681  |
  Column: cancelled   
 Measure   |  Value  |
----------------------
        n  |     39  |
      sum  |      0  |
     Mean  |      0  |
      Min  |      0  |
      Max  |      0  |
    Range  |      0  |
 Variance  |      0  |
 Std. Dev  |      0  |
   Column: carrier    
 Category  |  Count  |
----------------------
       AA  |     39  |
   Column: tailnum    
 Category  |  Count  |
----------------------
   N3KDAA  |      1  |
   N3KRAA  |      1  |
   N3BSAA  |      1  |
   N3HFAA  |      1  |
   N3BJAA  |      1  |
   N5FJAA  |      1  |
   N328AA  |      1  |
   N3ESAA  |      1  |
   N5CEAA  |      1  |
   N3JWAA  |      1  |
      ...  |    ...  |
   N336AA  |      1  |
   N3BCAA  |      1  |
   N3JMAA  |      1  |
   N3CWAA  |      1  |
   N323AA  |      1  |
   N3EHAA  |      1  |
   N319AA  |      1  |
   N338AA  |      1  |
   N327AA  |      1  |
   N335AA  |      1  |
          Column: flight          
 Measure   |        Value        |
----------------------------------
        n  |                 39  |
      sum  |              13176  |
     Mean  |  337.8461538461538  |
      Min  |                  1  |
      Max  |               1171  |
    Range  |               1170  |
 Variance  |  89952.60728744938  |
 Std. Dev  |  299.9210017445417  |
    Column: origin    
 Category  |  Count  |
----------------------
      JFK  |     18  |
      EWR  |      4  |
      LGA  |     17  |
     Column: dest     
 Category  |  Count  |
----------------------
      STT  |      1  |
      BOS  |      2  |
      DFW  |      3  |
      AUS  |      1  |
      IAH  |      1  |
      SJU  |      1  |
      ORD  |     14  |
      SFO  |      1  |
      SEA  |      1  |
      LAX  |      8  |
      MIA  |      5  |
      PBI  |      1  |
         Column: air_time          
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |                8026  |
     Mean  |  205.79487179487182  |
      Min  |                  35  |
      Max  |                 365  |
    Range  |                 330  |
 Variance  |   9050.535762483129  |
 Std. Dev  |    95.1343038156223  |
         Column: distance          
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |               52212  |
     Mean  |  1338.7692307692312  |
      Min  |                 187  |
      Max  |                2586  |
    Range  |                2399  |
 Variance  |   562719.0242914981  |
 Std. Dev  |   750.1460019832793  |
           Column: hour            
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |                 528  |
     Mean  |  13.538461538461533  |
      Min  |                   5  |
      Max  |                  21  |
    Range  |                  16  |
 Variance  |  19.991902834008094  |
 Std. Dev  |  4.4712305726732655  |
            Column: min            
 Measure   |        Value         |
-----------------------------------
        n  |                  39  |
      sum  |                1319  |
     Mean  |  33.820512820512825  |
      Min  |                   2  |
      Max  |                  57  |
    Range  |                  55  |
 Variance  |  274.04588394062074  |
 Std. Dev  |  16.554331274340885  |

In [8]:
val values = doubleArrayOf(1.0, 2.0, 3.0, 7.0, 9.44242, 11.0)
values
Out[8]:
[1.0, 2.0, 3.0, 7.0, 9.44242, 11.0]
In [9]:
val column = DoubleColumn.create("my_numbers", values)
Out[9]:
null
In [11]:
DoubleColumn.create("col", 0.until(10).map{
    i -> i.toDouble()
})
In [5]:
import tech.tablesaw.api.Table
import tech.tablesaw.api.DoubleColumn

val table = Table.create("Table1")

for (i in 0.until(10)) {
    val column = DoubleColumn.create("column_" + i, 0.until(10).map{
        j -> (j + 10 * i).toDouble()
    })
    table.addColumns(column)
}
table.print()
Out[5]:
                                                             Table1                                                              
 column_0  |  column_1  |  column_2  |  column_3  |  column_4  |  column_5  |  column_6  |  column_7  |  column_8  |  column_9  |
---------------------------------------------------------------------------------------------------------------------------------
        0  |        10  |        20  |        30  |        40  |        50  |        60  |        70  |        80  |        90  |
        1  |        11  |        21  |        31  |        41  |        51  |        61  |        71  |        81  |        91  |
        2  |        12  |        22  |        32  |        42  |        52  |        62  |        72  |        82  |        92  |
        3  |        13  |        23  |        33  |        43  |        53  |        63  |        73  |        83  |        93  |
        4  |        14  |        24  |        34  |        44  |        54  |        64  |        74  |        84  |        94  |
        5  |        15  |        25  |        35  |        45  |        55  |        65  |        75  |        85  |        95  |
        6  |        16  |        26  |        36  |        46  |        56  |        66  |        76  |        86  |        96  |
        7  |        17  |        27  |        37  |        47  |        57  |        67  |        77  |        87  |        97  |
        8  |        18  |        28  |        38  |        48  |        58  |        68  |        78  |        88  |        98  |
        9  |        19  |        29  |        39  |        49  |        59  |        69  |        79  |        89  |        99  |
In [ ]:

Comments