Spark机器学习--treeAggregrate
最近项目不是很忙,把一些东西整理出来,当作笔记,主要是为了养成一个好的习惯。
这个主要介绍MLlib源码主要出现的treeAggregrate
package com.lm.spark.ml
import org.apache.spark.{SparkConf, SparkContext}
object Treeaggreate {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local[*]")
val sc = new SparkContext(conf)
sc.setLogLevel("WARN")
val rdd = sc.parallelize(1 to 12).repartition(6)
// rdd.mapPartitionsWithIndex((index:Int,it:Iterator[Int])=>{
// Array((s" $index : ${it.toList.mkString(",")}")).toIterator
// }).foreach(println)
rdd.mapPartitionsWithIndex((index:Int,it:Iterator[Int])=>{
Array((s" $index : ${it.toList.mkString(",")}")).toIterator
}).foreach(println)
val res1 = rdd.aggregate(0)(seq,opt)
println("----------")
val res2 = rdd.treeAggregate(0)(seq,opt)
sc.stop()
}
def seq(s1:Int,s2:Int):Int = {
println("seq "+s1+":"+s2)
s1+s2
}
def opt(s1:Int,s2:Int):Int = {
println("opt "+s1+":"+s2)
s1+s2
}
}
源码结构:
上面是copy别人的图;
http://www.mamicode.com/info-detail-2367355.html