Big data config for laptop

2022-02-21 14:37:23 +01:00 · 2022-02-21 14:37:23 +01:00 · 9e4879f7a0
commit 9e4879f7a0
parent 72df315b4d
10 changed files with 119 additions and 499 deletions
--- a/nixos/base.nix
+++ b/nixos/base.nix
@ -1,4 +1,46 @@
-{ config, pkgs, lib, ... }: {
+{ config, pkgs, lib, ... }:
+
+let sparkConfDir = pkgs.stdenv.mkDerivation {
+  name = "spark-config";
+  dontUnpack = true;
+  installPhase = ''
+    # source standard environment
+    . $stdenv/setup
+
+    # shorthands
+    base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/
+
+    # create output dirs for new derivation
+    mkdir -p $out/
+
+    # link unchanged files from the original gnome-session
+    for f in $base_conf/*.template ; do
+       ln -sf $f $out/
+    done
+
+    # change selected files
+    cp $out/log4j.properties{.template,}
+
+    cat > $out/spark-env.sh <<- STOP
+    export JAVA_HOME="${pkgs.jdk8}"
+    export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
+    export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
+    export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
+    export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
+    export SPARKR_R_SHELL="${pkgs.R}/bin/R"
+    export PATH="\$PATH:${pkgs.R}/bin"
+    STOP
+
+    cat > $out/spark-defaults.conf <<- STOP
+    spark.eventLog.enabled                  true
+    spark.eventLog.dir                      hdfs://localhost:/logs/spark
+    spark.history.fs.logDirectory           hdfs://localhost:/logs/spark
+    STOP
+  '';
+};
+in
+
+{

  imports = [
    <nixos-hardware/common/cpu/intel>
@ -210,6 +252,82 @@
  services.power-profiles-daemon.enable = true;

  # services.teamviewer.enable = true;
+  services = {
+
+    spark = {
+      master = {
+        enable = true;
+        restartIfChanged = true;
+      };
+      worker = {
+        enable = true;
+        restartIfChanged = true;
+      };
+      confDir = sparkConfDir;
+    };
+
+    hadoop = {
+      coreSite = {
+        "fs.defaultFS" = "hdfs://localhost:8020";
+      };
+      hdfsSite = {
+        "dfs.namenode.rpc-bind-host" = "0.0.0.0";
+        "dfs.permissions" = "false";
+      };
+
+      hdfs = {
+        namenode = {
+          enable = true;
+          formatOnInit = true;
+          restartIfChanged = true;
+        };
+        datanode = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        journalnode = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        zkfc = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        httpfs = {
+          enable = true;
+          restartIfChanged = true;
+        };
+      };
+      yarn = {
+        resourcemanager.enable = true;
+        nodemanager.enable = true;
+      };
+    };
+  };
+
+  systemd.services.spark-history = {
+    path = with pkgs; [ procps openssh nettools ];
+    description = "spark history service.";
+    after = [ "network.target" ];
+    wantedBy = [ "multi-user.target" ];
+    restartIfChanged = true;
+    environment = {
+      SPARK_CONF_DIR = sparkConfDir;
+      SPARK_LOG_DIR = "/var/log/spark";
+    };
+    serviceConfig = {
+      Type = "forking";
+      User = "spark";
+      Group = "spark";
+      WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}";
+      ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh";
+      ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh";
+      TimeoutSec = 300;
+      StartLimitBurst = 10;
+      Restart = "always";
+    };
+  };
+

  # Virtualisation
  virtualisation = {