diff --git a/nixos/big_data.nix b/nixos/big_data.nix index 3efcad5..7566112 100644 --- a/nixos/big_data.nix +++ b/nixos/big_data.nix @@ -1,11 +1,16 @@ { config, lib, pkgs, ... }: let keytab_path = /etc/hadoop.keytab; + pysparkPackageSelector = p: with p; [ numpy pyspark ]; + pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector; hadoopConf = import { inherit pkgs lib; cfg = config.services.hadoop; }; hadoopConfDir = "${hadoopConf}/"; + spark = pkgs.spark.override { + extraPythonPackages = pysparkPackageSelector pkgs.python3.pkgs; + }; sparkConfDir = pkgs.stdenv.mkDerivation { name = "spark-config"; dontUnpack = true; @@ -31,7 +36,8 @@ let export JAVA_HOME="${pkgs.jdk8}" export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) - export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" + export PYSPARK_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:" + export PYSPARK_DRIVER_PYTHON="${pysparkEnv.outPath}/bin/${pysparkEnv.executable}:" export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" export HADOOP_CONF_DIR="${hadoopConfDir}" export SPARKR_R_SHELL="${pkgs.R}/bin/R" @@ -42,8 +48,8 @@ let spark.eventLog.enabled true spark.eventLog.dir hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark - spark.yarn.keytab ${keytab_path} - spark.yarn.principal spark/my.engine@MY.ENGINE + # spark.yarn.keytab ${keytab_path} + # spark.yarn.principal spark/my.engine@MY.ENGINE STOP ''; }; @@ -63,19 +69,12 @@ in services = { spark = { - master = { - enable = true; - restartIfChanged = true; - }; - worker = { - enable = true; - restartIfChanged = true; - }; + package = spark; + master = { enable = true; restartIfChanged = true; }; + worker = { enable = true; restartIfChanged = true; }; confDir = sparkConfDir; }; - - hadoop = { coreSite = { "fs.defaultFS" = "hdfs://my.engine:8020"; @@ -188,11 +187,7 @@ in enable = true; realms."MY.ENGINE".acl = [ { principal = "*/admin"; access = "all"; } - { principal = "admin"; access = "all"; } - { principal = "*/localhost"; access = "all"; } { principal = "*/my.engine"; access = "all"; } - { principal = "nn/my.engine"; access = "all"; } - { principal = "hdfs"; access = "all"; } ]; }; };