# Date parsing
spark = SparkSession.builder.appName("df_manage_date").getOrCreate()
df = spark.read.csv(
"file:///home/jovyan/work/sample/date_parsing.csv", header=True, inferSchema=True)
df.show()
+----------+------+
| date|number|
+----------+------+
|2022-01-27| 2723|
|2021-12-29| 1460|
|2022-01-22| 3411|
|2022-01-06| 1527|
|2022-04-21| 3978|
|2022-10-23| 3443|
|2021-12-23| 1641|
|2022-05-31| 1633|
|2021-12-29| 1072|
|2021-12-30| 2936|
|2022-05-04| 2494|
|2022-06-22| 2019|
|2022-04-23| 3804|
|2022-08-04| 1619|
|2022-01-26| 1306|
|2022-09-23| 3918|
|2022-05-27| 3209|
|2022-09-20| 2333|
|2022-07-05| 1861|
|2022-07-18| 3404|
+----------+------+
df.select(f.year('date')).show()
df.select(f.month('date')).show()
df.select(f.dayofmonth('date').alias('day')).show()
df.select(f.dayofyear('date').alias('day')).show()
df = df.withColumn("year", f.year('date')).groupBy("year").mean("number").withColumnRenamed("avg(number)", "avg")
df.select("year", f.format_number("avg", 2).alias("avg")).show()