packagecom.yl.wordcountimportjava.io.Fileimportorg.apache.spark.{SparkConf,SparkContext}imports package com.yl.wordcountimport java.io.Fileimport org.apache.spark.{SparkConf, SparkContext}import scala.collection.Iteratorimport scala.io.Sourc
packagecom.yl.wordcountimportjava.io.Fileimportorg.apache.spark.{SparkConf,SparkContext}imports package com.yl.wordcountimport java.io.Fileimport org.apache.spark.{SparkConf, SparkContext}import scala.collection.Iteratorimport scala.io.Source/** * wordcount进行排序并排除停用词 */object WordCountStopWords { def main(args: Array[String]) { val cOnf= new SparkConf().setMaster("spark://localhost:7077").setAppName("wordcount") val sc = new SparkContext(conf) val outFile = "/Users/admin/spark/sparkoutput" var stopWords:Iterator[String] = null val stopWordsFile = new File("/Users/admin/src"+"/tingyongci.txt") if(stopWordsFile.exists()){ stopWords = Source.fromFile(stopWordsFile).getLines } val stopWordList = stopWords.toList val textFile = sc.textFile("/Users/admin/spark/spark-1.5.1-bin-hadoop2.4/README.md") val result = textFile.flatMap(_.split(" ")).filter(!_.isEmpty).filter(!stopWordList.contains(_)).map((_,1)).reduceByKey(_+_).map{case (word,count) =>(count,word)}.sortByKey(false) result.saveAsTextFile(outFile) } }