Update big data config

This commit is contained in:
Filippo Berto 2022-03-29 09:47:57 +02:00
parent a27dbfd681
commit 86a6fbac76

View file

@ -1,11 +1,16 @@
{ config, lib, pkgs, ... }:
let
keytab_path = /etc/hadoop.keytab;
pysparkPackageSelector = p: with p; [ numpy pyspark ];
pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector;
hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> {
inherit pkgs lib;
cfg = config.services.hadoop;
};
hadoopConfDir = "${hadoopConf}/";
spark = pkgs.spark.override {
extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs;
};
sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config";
dontUnpack = true;
@ -31,7 +36,8 @@ let
export JAVA_HOME="${pkgs.jdk8}"
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export HADOOP_CONF_DIR="${hadoopConfDir}"
export SPARKR_R_SHELL="${pkgs.R}/bin/R"
@ -42,8 +48,8 @@ let
spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
spark.yarn.keytab ${keytab_path}
spark.yarn.principal spark/my.engine@MY.ENGINE
# spark.yarn.keytab ${keytab_path}
# spark.yarn.principal spark/my.engine@MY.ENGINE
STOP
'';
};
@ -63,19 +69,12 @@ in
services = {
spark = {
master = {
enable = true;
restartIfChanged = true;
};
worker = {
enable = true;
restartIfChanged = true;
};
package = spark;
master = { enable = true; restartIfChanged = true; };
worker = { enable = true; restartIfChanged = true; };
confDir = sparkConfDir;
};
hadoop = {
coreSite = {
"fs.defaultFS" = "hdfs://my.engine:8020";
@ -188,11 +187,7 @@ in
enable = true;
realms."MY.ENGINE".acl = [
{ principal = "*/admin"; access = "all"; }
{ principal = "admin"; access = "all"; }
{ principal = "*/localhost"; access = "all"; }
{ principal = "*/my.engine"; access = "all"; }
{ principal = "nn/my.engine"; access = "all"; }
{ principal = "hdfs"; access = "all"; }
];
};
};