Centralized big_data config

This commit is contained in:
Filippo Berto 2022-03-21 09:50:26 +01:00
parent 5a0f9cc5b4
commit 1680b2bc45
3 changed files with 122 additions and 211 deletions

120
nixos/big_data.nix Normal file
View file

@ -0,0 +1,120 @@
{ config, lib, pkgs, ... }:
let sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config";
dontUnpack = true;
installPhase = ''
# source standard environment
. $stdenv/setup
# shorthands
base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/
# create output dirs for new derivation
mkdir -p $out/
# link unchanged files from the original gnome-session
for f in $base_conf/*.template ; do
ln -sf $f $out/
done
# change selected files
cp $out/log4j.properties{.template,}
cat > $out/spark-env.sh <<- STOP
export JAVA_HOME="${pkgs.jdk8}"
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export SPARKR_R_SHELL="${pkgs.R}/bin/R"
export PATH="\$PATH:${pkgs.R}/bin"
STOP
cat > $out/spark-defaults.conf <<- STOP
spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
STOP
'';
};
in
{
services = {
spark = {
master = {
enable = true;
restartIfChanged = true;
};
worker = {
enable = true;
restartIfChanged = true;
};
confDir = sparkConfDir;
};
hadoop = {
coreSite = {
"fs.defaultFS" = "hdfs://localhost:8020";
};
hdfsSite = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.permissions" = "false";
"dfs.namenode.name.dir" = "/hdfs/dfs/name";
"dfs.datanode.data.dir" = "/hdfs/dfs/data";
};
hdfs = {
namenode = {
enable = true;
formatOnInit = true;
restartIfChanged = true;
};
datanode = {
enable = true;
restartIfChanged = true;
};
journalnode = {
enable = true;
restartIfChanged = true;
};
zkfc = {
enable = true;
restartIfChanged = true;
};
httpfs = {
enable = true;
restartIfChanged = true;
};
};
yarn = {
resourcemanager.enable = true;
nodemanager.enable = true;
};
};
};
systemd.services.spark-history = {
path = with pkgs; [ procps openssh nettools ];
description = "spark history service.";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
restartIfChanged = true;
environment = {
SPARK_CONF_DIR = sparkConfDir;
SPARK_LOG_DIR = "/var/log/spark";
};
serviceConfig = {
Type = "forking";
User = "spark";
Group = "spark";
WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}";
ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh";
ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh";
TimeoutSec = 300;
StartLimitBurst = 10;
Restart = "always";
};
};
}