高级sql记录:

2022年6月19日21:47:41 发表评论 94 views

package day0108


import java.text.SimpleDateFormat

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession

object SparkSqlDemo {
  def main(args: Array[String]): Unit = {

    //去除无用INFO
    Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    //获取sparksession
    val spark = SparkSession.builder().master("local").appName("sqlDemo").getOrCreate()
    //读取数据,表结构映射,生成DF,注册表
    import spark.sqlContext.implicits._
    //创建学生表
    spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Student.csv").
      map(_.split(","))
      .map(x => Student(x(0),x(1),x(2),x(3),x(4)))
      .toDF
      .createOrReplaceTempView("student")

    //创建成绩表
    spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Score.csv").
      map(_.split(","))
      .map(x => Score(x(0),x(1),x(2)))
      .toDF
      .createOrReplaceTempView("score")

    //创建课程表
    spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Course.csv").
      map(_.split(","))
      .map(x => Course(x(0),x(1),x(2)))
      .toDF
      .createOrReplaceTempView("course")
    //创建教师表
    spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Teacher.csv").
      map(_.split(","))
      .map(x => Teacher(x(0),x(1),x(2),x(3),x(4),x(5)))
      .toDF
      .createOrReplaceTempView("teacher")

//    spark.sql("select * from student").show()
//    spark.sql("select * from score").show()
//    spark.sql("select * from course").show()
//    spark.sql("select * from teacher").show(false)



    //注意:排序使用String类型按照字典顺序排序,需要进行类型转换
//    spark.sql("select degree from score order by int(degree) desc ").show()

    //查询没门课程的平均成绩;spark-sql对于大小写也是不敏感的
//    spark.sql("select cnum,avg(cast(degree as int))  from score group by cnum").show()
//    spark.sql("select cnum,avg(degree)  from SCORE group by cnum").show()


    //查询score中至少有5名学生选课,并且课程编号是3开头的课程的平均分
//    spark.sql("select cnum,avg(degree) from score where cnum like '3%' group by cnum having count(1) >=5").show()

    //查询score表中选择多门课程的同学,最高分的学生
//    spark.sql("select snum,degree from score where" +
//      " snum in (select t.snum from score t group by t.snum having count(t.snum)>1) " +
//      "and degree = (select max(degree) from score) ").show()


    //查询学生的学号为108的同年的所有学生
//    spark.sql("select * from student where " +
//      "substring(sbirthday,0,4) = " +
//      "(select substring(sbirthday,0,4) from student where snum = '108')" ).show()

    //查询选修课程人数大于5,的课程的教师的姓名
//    spark.sql("select tname from teacher e " +
//      "join course c on e.tnum = c.tnum " +
//      "join (select cnum from score group by cnum having count(1)>5) t on t.cnum = c.cnum").show()

    //查询成绩比平均成绩低的学生成绩
//    spark.sql("select * from score s where s.degree < (select avg(degree) from score c where s.snum = c.snum)").show()

    //查询所有没有讲课的教师
//    spark.sql("select * from teacher t where t.tnum not in " +
//      "(select tnum from course c where c.cnum in " +
//      "(select cnum from score))").show(false)

    //查询学生表的年龄

    spark.sql("select sname , " +
          "(" +
          "cast("+getDate("yyyy")+" as int) " +
          "- " +
          "cast(substring(sbirthday,0,4) as int) " +
          ") as age from student").show()

    spark.sql("select sname ,"+getDate("yyyy")+" - substring(sbirthday,0,4) from student").show()


    spark.stop()


  }

  //获取当前时间
  def getDate(time:String): String ={
    val now:Long = System.currentTimeMillis()
    val df:SimpleDateFormat = new SimpleDateFormat(time)
    df.format(now)
  }

}

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: