ssledz blog

Everything should be made as simple as possible, but no simpler.

Simple Sbt Setup for Spark Project

Below can be found a simple sbt setup for a spark application in scala.

Directory layout

1
2
3
4
5
6
7
8
9
10
11
find .
.
./build.sbt
./src/main
./src/main/scala
./src/main/scala/pl
./src/main/scala/pl/softech
./src/main/scala/pl/softech/WordCountExample.scala
./src/main/resources
./src/main/resources/log4j.properties
./src/main/resources/words.txt

build.sbt

1
2
3
4
5
6
7
8
9
name := "spark-simple-app"

version := "1.0"

scalaVersion := "2.11.8"

val sparkVersion = "2.1.0"

libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion

log4j.properties

1
2
3
4
5
log4j.rootCategory=ERROR, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

WordCountExample.scala

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
package pl.softech

import org.apache.spark.{SparkConf, SparkContext}


object WordCountExample {

  def main(args: Array[String]) {

    val conf = new SparkConf().setAppName("spark-simple-app").setMaster("local[*]")

    val sc = new SparkContext(conf)

    val textFile = sc.textFile("src/main/resources/words.txt")

    val counts = textFile.flatMap(line => line.split(" "))
      .map(word => (word, 1))
      .reduceByKey(_ + _)
      .sortBy(-_._2)

    printf(counts.collect().mkString("\n"))

    sc.stop()
  }

}

Sources can be found here

Comments