Fix spark python + spark history kerberos

This commit is contained in:
Filippo Berto 2022-03-30 09:35:53 +02:00
parent 86a6fbac76
commit 52047104cc

View file

@ -12,7 +12,7 @@ let
extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs; extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs;
}; };
sparkConfDir = pkgs.stdenv.mkDerivation { sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config"; name = "spark-conf";
dontUnpack = true; dontUnpack = true;
installPhase = '' installPhase = ''
# source standard environment # source standard environment
@ -36,8 +36,8 @@ let
export JAVA_HOME="${pkgs.jdk8}" export JAVA_HOME="${pkgs.jdk8}"
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:" export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}"
export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:" export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export HADOOP_CONF_DIR="${hadoopConfDir}" export HADOOP_CONF_DIR="${hadoopConfDir}"
export SPARKR_R_SHELL="${pkgs.R}/bin/R" export SPARKR_R_SHELL="${pkgs.R}/bin/R"
@ -45,11 +45,17 @@ let
STOP STOP
cat > $out/spark-defaults.conf <<- STOP cat > $out/spark-defaults.conf <<- STOP
spark.eventLog.enabled true spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark
# spark.yarn.keytab ${keytab_path} # spark.yarn.keytab ${keytab_path}
# spark.yarn.principal spark/my.engine@MY.ENGINE # spark.yarn.principal spark/my.engine@MY.ENGINE
spark.history.ui.acls.enable true
spark.history.kerberos.enabled true
spark.history.kerberos.keytab ${keytab_path}
spark.history.kerberos.principal spark/my.engine@MY.ENGINE
spark.yarn.appMasterEnv.PYSPARK_PYTHON ${pysparkEnv.outPath}/bin/${pysparkEnv.executable}
spark.yarn.appMasterEnv.PYTHONPATH ${pysparkEnv.outPath}/lib/${pysparkEnv.executable}/site-packages
STOP STOP
''; '';
}; };
@ -174,7 +180,7 @@ in
namenode = { enable = true; formatOnInit = true; restartIfChanged = true; }; namenode = { enable = true; formatOnInit = true; restartIfChanged = true; };
datanode = { enable = true; restartIfChanged = true; }; datanode = { enable = true; restartIfChanged = true; };
journalnode = { enable = true; restartIfChanged = true; }; journalnode = { enable = true; restartIfChanged = true; };
zkfc = { enable = true; restartIfChanged = true; }; zkfc = { enable = false; restartIfChanged = true; }; # ZOOKEEPER DISABLED, not using High Availability setup
httpfs = { enable = true; restartIfChanged = true; }; httpfs = { enable = true; restartIfChanged = true; };
}; };
yarn = { yarn = {