上传文件到hdfs:
hadoop fs -put /root/people.json /
/root/people.json:本地路径
/:hdfs路径
from pyspark.sql import SparkSessionspark = SparkSession \ .builder \ .appName("Python Spark SQL basic example") \ .config("spark.some.config.option", "some-value") \ .getOrCreate() ------------------ df = spark.read.json("/people.json") df.show()
df.printSchema() df.select("name").show()
df.select(df['name'], df['age'] + 1).show()
df.filter(df['age'] > 21).show() df.groupBy("age").count().show() --------------------------------
df.createOrReplaceTempView("people") sqlDF = spark.sql("SELECT * FROM people") sqlDF.show()