Ben Chuanlong Du's Blog

It is never too late to learn.

HashMap in Scala

Comment

HashTable is a trait without a factory method and thus cannot be instantiated. HashMap and HashSet are subclasses that you should use.

https://stackoverflow.com/questions/3648870/scala-using-hashmap-with-a-default-value

In [1]:
import collection.immutable.HashMap

val m = HashMap("x" -> 24, "y" -> 25)
m
m = Map(x -> 24, y -> 25)
Out[1]:
Map(x -> 24, y -> 25)
In [2]:
m("x")
Out[2]:
24
In [4]:
for ((k,v) <- m) {
    println(k + ": " + v)
}
x: 24
y: 25
In [5]:
m.foreach {
    e => println(e._1 + ": " + e._2)
}
x: 24
y: 25
In [10]:
m.isInstanceOf[Map[String, Int]]
Out[10]:
true
In [13]:
val m = Map("x1" -> 24, "x2" -> 24)
m.isInstanceOf[HashMap[String, Int]]
Out[13]:
false
In [14]:
val m = Map("x1" -> 24, "x2" -> 24, "x3" -> 100, "x4" -> 200, "x5" -> 300)
m.isInstanceOf[HashMap[String, Int]]
Out[14]:
true
In [16]:
m.getClass
Out[16]:
class scala.collection.immutable.HashMap$HashTrieMap

toMap

In [3]:
List(("Ben", 1), ("Lisa", 2)).toMap
Out[3]:
Map(Ben -> 1, Lisa -> 2)
In [4]:
List(("Ben", 1), ("Lisa", 2)).toMap
Out[4]:
Map(Ben -> 1, Lisa -> 2)

Seq of Tuple to Map

In [1]:
Seq(("Ben", 30), ("Dan", 40), ("Kun", 34)).toMap

Notice that if a key appears more than once, the last occurrence is used.

In [2]:
Seq(("Ben", 30), ("Dan", 40), ("Dan", 34), ("Ben", 20)).toMap

You can manually define the behavior of aggregation of course. For example, the code below sum values belong to the same key.

In [6]:
Seq(
    ("Ben", 30),
    ("Dan", 40),
    ("Dan", 34),
    ("Ben", 20)
).groupBy(_._1).mapValues{
    values => {
        values.map(_._2).sum
    }
}

Count Frequencies of Values

In [4]:
Seq(1, 1, 2, 2, 3, 1, 2, 2, 4, 3).groupBy(identity).mapValues(_.size)

Iterate a Map

In [5]:
import collection.immutable.HashMap

val m = HashMap("x" -> "24", "y" -> "25")
m
In [6]:
val sql = "select * from table where id in (${x}, ${y})"
Out[6]:
select * from table where id in (${x}, ${y})
In [7]:
m.foldLeft(sql)((res, item) => res.replaceAllLiterally("${" + item._1 + "}", item._2))
Out[7]:
select * from table where id in (24, 25)

Join Lists

In [8]:
val l1 = List(
    ("Ben", 1.0),
    ("Lisa", 2.0)
)

val l2 = List(
    ("Ben", 1000.0),
    ("Tiger", 2000.0)
)
Out[8]:
[[(Ben,1000.0), (Tiger,2000.0)]]
In [9]:
def join(c1: List[(String, Double)], c2: List[(String, Double)]): List[(Double, Double)] = {
    val m = c2.toMap
    c1.collect {
      case (user, toc) if m.contains(user) => (toc, m(user))
    }
}
Out[9]:
join: (c1: List[(String, Double)], c2: List[(String, Double)])List[(Double, Double)]
In [10]:
join(l1, l2)
Out[10]:
[[(1.0,1000.0)]]

Comments