From 38b0f255c9ffd39d1777a5c1e758a2bd9689b093 Mon Sep 17 00:00:00 2001 From: Mohamed Ali Jamaoui Date: Tue, 23 Jul 2024 09:44:26 +0100 Subject: [PATCH] Add support for Log4j2 for Spark This changes allows users to introduce log configuration using spark log4j2. This is important because Spark 3.3 onwards migrate from Log4j 1.x, and that Log4j 1.x has reached end of life. Below a snippet from [Spark core migration guide](https://spark.apache.org/docs/latest/core-migration-guide.html#upgrading-from-core-32-to-33): > Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because log4j 1.x has reached end of life and is no longer supported by the community. Vulnerabilities reported after August 2015 against log4j 1.x were not checked and will not be fixed. Users should rewrite original log4j properties files using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the conf/log4j.properties.template which is included in Spark distribution, to conf/log4j2.properties.template with log4j2 properties format. --- src/smspark/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/smspark/config.py b/src/smspark/config.py index 525ed62..f706783 100644 --- a/src/smspark/config.py +++ b/src/smspark/config.py @@ -89,6 +89,7 @@ class Configuration: _ClassificationData("spark-defaults", "/usr/lib/spark/conf/spark-defaults.conf", conf_serializer), _ClassificationData("spark-env", "/usr/lib/spark/conf/spark-env.sh", env_serializer), _ClassificationData("spark-log4j", "/usr/lib/spark/conf/log4j.properties", properties_serializer), + _ClassificationData("spark-log4j2", "/usr/lib/spark/conf/log4j2.properties", properties_serializer), _ClassificationData("spark-hive-site", "/usr/lib/spark/conf/hive-site.xml", xml_serializer), _ClassificationData("spark-metrics", "/usr/lib/spark/conf/metrics.properties", properties_serializer), _ClassificationData("yarn-env", "/usr/lib/hadoop/etc/hadoop/yarn-env.sh", env_serializer),