1 package big.data.analyse.sparksql 2 3 import org.apache.spark.sql.{Row, SparkSession} 4 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} 5 6 /** 7 * Created by zhen on 2018/11/8. 8 */ 9 object SparkInFuncation {10 def main(args: Array[String]) {11 val spark = SparkSession.builder().appName("spark内置函数")12 .master("local[2]").getOrCreate()13 val userData = Array(14 "2015,1,www.baidu.com",15 "2016,4,www.google.com",16 "2017,3,www.apache.com",17 "2015,6,www.spark.com",18 "2016,2,www.hadoop.com",19 "2017,8,www.solr.com",20 "2017,4,www.hive.com"21 )22 val sc = spark.sparkContext23 val sqlContext = spark.sqlContext24 val userDataRDD = sc.parallelize(userData) // 转化为RDD25 val userDataType = userDataRDD.map(line => {26 val Array(age, id, url) = line.split(",")27 Row(28 age, id.toInt, url29 )30 })31 val structTypes = StructType(Array(32 StructField("age", StringType, true),33 StructField("id", IntegerType, true),34 StructField("url", StringType, true)35 ))36 // RDD转化为DataFrame37 val userDataFrame = sqlContext.createDataFrame(userDataType,structTypes)38 39 import org.apache.spark.sql.functions._40 userDataFrame41 .groupBy("age") // 分组42 .agg(("id","sum")) // 求和43 .orderBy(desc("age")) // 排序44 .show()45 }46 }
答案: