发现个好东西: 一个免费短链生成项目:“ https://www.isok.co”一个可以生成所有短链的服务,isok.co的这个含义能覆盖所有短链含义isok, 我试过了很不错,都可以试试
package day0108
import java.text.SimpleDateFormat
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
object SparkSqlDemo {
def main(args: Array[String]): Unit = {
//去除无用INFO
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
//获取sparksession
val spark = SparkSession.builder().master("local").appName("sqlDemo").getOrCreate()
//读取数据,表结构映射,生成DF,注册表
import spark.sqlContext.implicits._
//创建学生表
spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Student.csv").
map(_.split(","))
.map(x => Student(x(0),x(1),x(2),x(3),x(4)))
.toDF
.createOrReplaceTempView("student")
//创建成绩表
spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Score.csv").
map(_.split(","))
.map(x => Score(x(0),x(1),x(2)))
.toDF
.createOrReplaceTempView("score")
//创建课程表
spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Course.csv").
map(_.split(","))
.map(x => Course(x(0),x(1),x(2)))
.toDF
.createOrReplaceTempView("course")
//创建教师表
spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Teacher.csv").
map(_.split(","))
.map(x => Teacher(x(0),x(1),x(2),x(3),x(4),x(5)))
.toDF
.createOrReplaceTempView("teacher")
// spark.sql("select * from student").show()
// spark.sql("select * from score").show()
// spark.sql("select * from course").show()
// spark.sql("select * from teacher").show(false)
//注意:排序使用String类型按照字典顺序排序,需要进行类型转换
// spark.sql("select degree from score order by int(degree) desc ").show()
//查询没门课程的平均成绩;spark-sql对于大小写也是不敏感的
// spark.sql("select cnum,avg(cast(degree as int)) from score group by cnum").show()
// spark.sql("select cnum,avg(degree) from SCORE group by cnum").show()
//查询score中至少有5名学生选课,并且课程编号是3开头的课程的平均分
// spark.sql("select cnum,avg(degree) from score where cnum like '3%' group by cnum having count(1) >=5").show()
//查询score表中选择多门课程的同学,最高分的学生
// spark.sql("select snum,degree from score where" +
// " snum in (select t.snum from score t group by t.snum having count(t.snum)>1) " +
// "and degree = (select max(degree) from score) ").show()
//查询学生的学号为108的同年的所有学生
// spark.sql("select * from student where " +
// "substring(sbirthday,0,4) = " +
// "(select substring(sbirthday,0,4) from student where snum = '108')" ).show()
//查询选修课程人数大于5,的课程的教师的姓名
// spark.sql("select tname from teacher e " +
// "join course c on e.tnum = c.tnum " +
// "join (select cnum from score group by cnum having count(1)>5) t on t.cnum = c.cnum").show()
//查询成绩比平均成绩低的学生成绩
// spark.sql("select * from score s where s.degree < (select avg(degree) from score c where s.snum = c.snum)").show()
//查询所有没有讲课的教师
// spark.sql("select * from teacher t where t.tnum not in " +
// "(select tnum from course c where c.cnum in " +
// "(select cnum from score))").show(false)
//查询学生表的年龄
spark.sql("select sname , " +
"(" +
"cast("+getDate("yyyy")+" as int) " +
"- " +
"cast(substring(sbirthday,0,4) as int) " +
") as age from student").show()
spark.sql("select sname ,"+getDate("yyyy")+" - substring(sbirthday,0,4) from student").show()
spark.stop()
}
//获取当前时间
def getDate(time:String): String ={
val now:Long = System.currentTimeMillis()
val df:SimpleDateFormat = new SimpleDateFormat(time)
df.format(now)
}
}
