Below can be found a simple sbt setup for a spark application in scala.
Directory layout
1
2
3
4
5
6
7
8
9
10
11
| find .
.
./build.sbt
./src/main
./src/main/scala
./src/main/scala/pl
./src/main/scala/pl/softech
./src/main/scala/pl/softech/WordCountExample.scala
./src/main/resources
./src/main/resources/log4j.properties
./src/main/resources/words.txt
|
build.sbt
1
2
3
4
5
6
7
8
9
| name := "spark-simple-app"
version := "1.0"
scalaVersion := "2.11.8"
val sparkVersion = "2.1.0"
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion
|
log4j.properties
1
2
3
4
5
| log4j.rootCategory=ERROR, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
|
WordCountExample.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| package pl.softech
import org.apache.spark.{SparkConf, SparkContext}
object WordCountExample {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("spark-simple-app").setMaster("local[*]")
val sc = new SparkContext(conf)
val textFile = sc.textFile("src/main/resources/words.txt")
val counts = textFile.flatMap(line => line.split(" "))
.map(word => (word, 1))
.reduceByKey(_ + _)
.sortBy(-_._2)
printf(counts.collect().mkString("\n"))
sc.stop()
}
}
|
Sources can be found here