Fix spark python + spark history kerberos

This commit is contained in:
Filippo Berto 2022-03-30 09:35:53 +02:00
parent 86a6fbac76
commit 52047104cc

View file

@ -12,7 +12,7 @@ let
extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs;
};
sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config";
name = "spark-conf";
dontUnpack = true;
installPhase = ''
# source standard environment
@ -36,8 +36,8 @@ let
export JAVA_HOME="${pkgs.jdk8}"
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}"
export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export HADOOP_CONF_DIR="${hadoopConfDir}"
export SPARKR_R_SHELL="${pkgs.R}/bin/R"
@ -45,11 +45,17 @@ let
STOP
cat > $out/spark-defaults.conf <<- STOP
spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
# spark.yarn.keytab ${keytab_path}
# spark.yarn.principal spark/my.engine@MY.ENGINE
spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
# spark.yarn.keytab ${keytab_path}
# spark.yarn.principal spark/my.engine@MY.ENGINE
spark.history.ui.acls.enable true
spark.history.kerberos.enabled true
spark.history.kerberos.keytab ${keytab_path}
spark.history.kerberos.principal spark/my.engine@MY.ENGINE
spark.yarn.appMasterEnv.PYSPARK_PYTHON ${pysparkEnv.outPath}/bin/${pysparkEnv.executable}
spark.yarn.appMasterEnv.PYTHONPATH ${pysparkEnv.outPath}/lib/${pysparkEnv.executable}/site-packages
STOP
'';
};
@ -174,7 +180,7 @@ in
namenode = { enable = true; formatOnInit = true; restartIfChanged = true; };
datanode = { enable = true; restartIfChanged = true; };
journalnode = { enable = true; restartIfChanged = true; };
zkfc = { enable = true; restartIfChanged = true; };
zkfc = { enable = false; restartIfChanged = true; }; # ZOOKEEPER DISABLED, not using High Availability setup
httpfs = { enable = true; restartIfChanged = true; };
};
yarn = {