Update big data config

This commit is contained in:
Filippo Berto 2022-03-29 09:47:57 +02:00
parent a27dbfd681
commit 86a6fbac76

View file

@ -1,11 +1,16 @@
{ config, lib, pkgs, ... }: { config, lib, pkgs, ... }:
let let
keytab_path = /etc/hadoop.keytab; keytab_path = /etc/hadoop.keytab;
pysparkPackageSelector = p: with p; [ numpy pyspark ];
pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector;
hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> { hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> {
inherit pkgs lib; inherit pkgs lib;
cfg = config.services.hadoop; cfg = config.services.hadoop;
}; };
hadoopConfDir = "${hadoopConf}/"; hadoopConfDir = "${hadoopConf}/";
spark = pkgs.spark.override {
extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs;
};
sparkConfDir = pkgs.stdenv.mkDerivation { sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config"; name = "spark-config";
dontUnpack = true; dontUnpack = true;
@ -31,7 +36,8 @@ let
export JAVA_HOME="${pkgs.jdk8}" export JAVA_HOME="${pkgs.jdk8}"
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export HADOOP_CONF_DIR="${hadoopConfDir}" export HADOOP_CONF_DIR="${hadoopConfDir}"
export SPARKR_R_SHELL="${pkgs.R}/bin/R" export SPARKR_R_SHELL="${pkgs.R}/bin/R"
@ -42,8 +48,8 @@ let
spark.eventLog.enabled true spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark
spark.yarn.keytab ${keytab_path} # spark.yarn.keytab ${keytab_path}
spark.yarn.principal spark/my.engine@MY.ENGINE # spark.yarn.principal spark/my.engine@MY.ENGINE
STOP STOP
''; '';
}; };
@ -63,19 +69,12 @@ in
services = { services = {
spark = { spark = {
master = { package = spark;
enable = true; master = { enable = true; restartIfChanged = true; };
restartIfChanged = true; worker = { enable = true; restartIfChanged = true; };
};
worker = {
enable = true;
restartIfChanged = true;
};
confDir = sparkConfDir; confDir = sparkConfDir;
}; };
hadoop = { hadoop = {
coreSite = { coreSite = {
"fs.defaultFS" = "hdfs://my.engine:8020"; "fs.defaultFS" = "hdfs://my.engine:8020";
@ -188,11 +187,7 @@ in
enable = true; enable = true;
realms."MY.ENGINE".acl = [ realms."MY.ENGINE".acl = [
{ principal = "*/admin"; access = "all"; } { principal = "*/admin"; access = "all"; }
{ principal = "admin"; access = "all"; }
{ principal = "*/localhost"; access = "all"; }
{ principal = "*/my.engine"; access = "all"; } { principal = "*/my.engine"; access = "all"; }
{ principal = "nn/my.engine"; access = "all"; }
{ principal = "hdfs"; access = "all"; }
]; ];
}; };
}; };