{ config, lib, pkgs, ... }: let sparkConfDir = pkgs.stdenv.mkDerivation { name = "spark-config"; dontUnpack = true; installPhase = '' # source standard environment . $stdenv/setup # shorthands base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/ # create output dirs for new derivation mkdir -p $out/ # link unchanged files from the original gnome-session for f in $base_conf/*.template ; do ln -sf $f $out/ done # change selected files cp $out/log4j.properties{.template,} cat > $out/spark-env.sh <<- STOP export JAVA_HOME="${pkgs.jdk8}" export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" export SPARKR_R_SHELL="${pkgs.R}/bin/R" export PATH="\$PATH:${pkgs.R}/bin" STOP cat > $out/spark-defaults.conf <<- STOP spark.eventLog.enabled true spark.eventLog.dir hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark STOP ''; }; in { services = { spark = { master = { enable = true; restartIfChanged = true; }; worker = { enable = true; restartIfChanged = true; }; confDir = sparkConfDir; }; hadoop = { coreSite = { "fs.defaultFS" = "hdfs://localhost:8020"; }; hdfsSite = { "dfs.namenode.rpc-bind-host" = "0.0.0.0"; "dfs.permissions" = "false"; "dfs.namenode.name.dir" = "/hdfs/dfs/name"; "dfs.datanode.data.dir" = "/hdfs/dfs/data"; }; hdfs = { namenode = { enable = true; formatOnInit = true; restartIfChanged = true; }; datanode = { enable = true; restartIfChanged = true; }; journalnode = { enable = true; restartIfChanged = true; }; zkfc = { enable = true; restartIfChanged = true; }; httpfs = { enable = true; restartIfChanged = true; }; }; yarn = { resourcemanager.enable = true; nodemanager.enable = true; }; }; }; systemd.services.spark-history = { path = with pkgs; [ procps openssh nettools ]; description = "spark history service."; after = [ "network.target" ]; wantedBy = [ "multi-user.target" ]; restartIfChanged = true; environment = { SPARK_CONF_DIR = sparkConfDir; SPARK_LOG_DIR = "/var/log/spark"; }; serviceConfig = { Type = "forking"; User = "spark"; Group = "spark"; WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}"; ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh"; ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh"; TimeoutSec = 300; StartLimitBurst = 10; Restart = "always"; }; }; }