diff --git a/nixos/base.nix b/nixos/base.nix index 3240307..0b5ded8 100644 --- a/nixos/base.nix +++ b/nixos/base.nix @@ -1,6 +1,5 @@ { config, pkgs, lib, ... }: { - imports = [ @@ -171,7 +170,7 @@ }; }; - services.dbus.packages = with pkgs; [ gnome.dconf ]; + services.dbus.packages = with pkgs; [ dconf ]; services.gnome.gnome-keyring.enable = true; hardware.bluetooth.enable = true; # services.blueman.enable = true; diff --git a/nixos/big_data.nix b/nixos/big_data.nix index d4d161e..3efcad5 100644 --- a/nixos/big_data.nix +++ b/nixos/big_data.nix @@ -1,42 +1,52 @@ { config, lib, pkgs, ... }: -let sparkConfDir = pkgs.stdenv.mkDerivation { - name = "spark-config"; - dontUnpack = true; - installPhase = '' - # source standard environment - . $stdenv/setup +let + keytab_path = /etc/hadoop.keytab; + hadoopConf = import { + inherit pkgs lib; + cfg = config.services.hadoop; + }; + hadoopConfDir = "${hadoopConf}/"; + sparkConfDir = pkgs.stdenv.mkDerivation { + name = "spark-config"; + dontUnpack = true; + installPhase = '' + # source standard environment + . $stdenv/setup - # shorthands - base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/ + # shorthands + base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/ - # create output dirs for new derivation - mkdir -p $out/ + # create output dirs for new derivation + mkdir -p $out/ - # link unchanged files from the original gnome-session - for f in $base_conf/*.template ; do - ln -sf $f $out/ - done + # link unchanged files from the original gnome-session + for f in $base_conf/*.template ; do + ln -sf $f $out/ + done - # change selected files - cp $out/log4j.properties{.template,} + # change selected files + cp $out/log4j.properties{.template,} - cat > $out/spark-env.sh <<- STOP - export JAVA_HOME="${pkgs.jdk8}" - export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" - export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) - export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" - export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" - export SPARKR_R_SHELL="${pkgs.R}/bin/R" - export PATH="\$PATH:${pkgs.R}/bin" - STOP + cat > $out/spark-env.sh <<- STOP + export JAVA_HOME="${pkgs.jdk8}" + export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" + export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) + export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" + export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" + export HADOOP_CONF_DIR="${hadoopConfDir}" + export SPARKR_R_SHELL="${pkgs.R}/bin/R" + export PATH="\$PATH:${pkgs.R}/bin" + STOP - cat > $out/spark-defaults.conf <<- STOP - spark.eventLog.enabled true - spark.eventLog.dir hdfs://localhost:/logs/spark - spark.history.fs.logDirectory hdfs://localhost:/logs/spark - STOP - ''; -}; + cat > $out/spark-defaults.conf <<- STOP + spark.eventLog.enabled true + spark.eventLog.dir hdfs://localhost:/logs/spark + spark.history.fs.logDirectory hdfs://localhost:/logs/spark + spark.yarn.keytab ${keytab_path} + spark.yarn.principal spark/my.engine@MY.ENGINE + STOP + ''; + }; in { @@ -66,132 +76,113 @@ in - hadoop = - let - keytab_path = /etc/hadoop.keytab; - in + hadoop = { + coreSite = { + "fs.defaultFS" = "hdfs://my.engine:8020"; - { - coreSite = { - # "fs.defaultFS" = "hdfs://0.0.0.0:8020"; + # HDFS IMPERSONATION + "hadoop.proxyuser.hdfs.hosts" = "*"; + "hadoop.proxyuser.hdfs.groups" = "*"; - # "hadoop.http.authentication.simple.anonymous.allowed" = "false"; - # "hadoop.http.authentication.signature.secret.file" = "/var/lib/hadoop/security/http_secret"; - # "hadoop.http.authentication.type" = "kerberos"; - # "hadoop.http.authentication.kerberos.principal" = "http/my.engine@MY.ENGINE"; - # "hadoop.http.authentication.cookie.domain" = "my.engine"; + # HIVE IMPERSONATION + "hadoop.proxyuser.hive.hosts" = "*"; + "hadoop.proxyuser.hive.groups" = "*"; - # "hadoop.security.authentication" = "kerberos"; - # "hadoop.security.authorization" = "true"; - # "hadoop.rpc.protection" = "authentication"; + # ENABLE AUTHENTICATION + "hadoop.security.authentication" = "kerberos"; + "hadoop.security.authorization" = "true"; + "hadoop.rpc.protection" = "privacy"; - - # "hadoop.rpc.protection" = "authentication"; - # "hadoop.security.auth_to_local" = '' - # RULE:[2:$1/$2@$0]([ndj]n/.*@MY.ENGINE)s/.*/hdfs/ - # RULE:[2:$1/$2@$0]([rn]m/.*@MY.ENGINE)s/.*/yarn/ - # RULE:[2:$1/$2@$0](jhs/.*@MY.ENGINE)s/.*/mapred/ - # DEFAULT - # ''; - # "hadoop.proxyuser.superuser.hosts" = "*"; # TODO: restrict - # "hadoop.proxyuser.superuser.groups" = "*"; # TODO: restrict - - "fs.defaultFS" = "hdfs://my.engine:8020"; - - # HDFS IMPERSONATION - "hadoop.proxyuser.hdfs.hosts" = "*"; - "hadoop.proxyuser.hdfs.groups" = "*"; - - # HIVE IMPERSONATION - "hadoop.proxyuser.hive.hosts" = "*"; - "hadoop.proxyuser.hive.groups" = "*"; - - # ENABLE AUTHENTICATION - "hadoop.security.authentication" = "kerberos"; - "hadoop.security.authorization" = "true"; - "hadoop.rpc.protection" = "privacy"; - - "hadoop.security.auth_to_local" = '' - RULE:[2:$1/$2@$0]([ndj]n/.*@MY\.ENGINE)s/.*/hdfs/ - RULE:[2:$1/$2@$0]([rn]m/.*@MY\.ENGINE)s/.*/yarn/ - RULE:[2:$1/$2@$0](jhs/.*@MY\.ENGINE)s/.*/mapred/ - DEFAULT - ''; - }; - hdfsSite = { - # DATA - "dfs.namenode.name.dir" = "/hdfs/dfs/name"; - "dfs.datanode.data.dir" = "/hdfs/dfs/data"; - - # HDFS SECURITY - "dfs.block.access.token.enable" = "true"; - - # NAME NODE SECURITY - "dfs.namenode.keytab.file" = keytab_path; - "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; - "dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; - - # SECONDARY NAME NODE SECURITY - "dfs.secondary.namenode.keytab.file" = keytab_path; - "dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; - "dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; - - # DATA NODE SECURITY - "dfs.datanode.keytab.file" = keytab_path; - "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; - - # WEBHDFS SECURITY - "dfs.webhdfs.enabled" = "true"; - - # WEB AUTHENTICATION CONFIG - "dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE"; - "dfs.web.authentication.kerberos.keytab" = keytab_path; - "ignore.secure.ports.for.testing" = "true"; - "dfs.http.policy" = "HTTP_ONLY"; - "dfs.data.transfer.protection" = "privacy"; - - # ## MULTIHOMED - # "dfs.namenode.rpc-bind-host" = "0.0.0.0"; - # "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; - # "dfs.namenode.http-bind-host" = "0.0.0.0"; - # "dfs.namenode.https-bind-host" = "0.0.0.0"; - # "dfs.client.use.datanode.hostname" = "true"; # force connection by hostname - # "dfs.datanode.use.datanode.hostname" = "true"; # force connection by hostname - - - # "dfs.data.transfer.protection" = "privacy"; - # "hadoop.rpc.protection" = "privacy"; - # "dfs.http.policy" = "HTTP_ONLY"; - # "dfs.datanode.address" = "0.0.0.0:10019"; - # "dfs.datanode.http.address" = "0.0.0.0:10022"; - # "dfs.datanode.https.address" = "0.0.0.0:10023"; - - - # "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; - # "dfs.datanode.keytab.file" = keytab_path; - - # "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; - # "dfs.namenode.keytab.file" = keytab_path; - - # "dfs.block.access.token.enable" = "true"; - - - }; - yarnSite = { - # "yarn.acl.enable" = "true"; - # "yarn.admin.acl" = "*"; # TODO: restrict - }; - extraConfDirs = [ ]; - - hdfs = { - namenode = { enable = true; formatOnInit = true; restartIfChanged = true; }; - datanode = { enable = true; restartIfChanged = true; }; - journalnode = { enable = true; restartIfChanged = true; }; - zkfc = { enable = true; restartIfChanged = true; }; - httpfs = { enable = true; restartIfChanged = true; }; - }; - yarn = { resourcemanager.enable = true; nodemanager.enable = true; }; + "hadoop.security.auth_to_local" = '' + RULE:[2:$1/$2@$0]([ndj]n/.*@MY\.ENGINE)s/.*/hdfs/ + RULE:[2:$1/$2@$0]([rn]m/.*@MY\.ENGINE)s/.*/yarn/ + RULE:[2:$1/$2@$0](jhs/.*@MY\.ENGINE)s/.*/mapred/ + DEFAULT + ''; }; + hdfsSite = { + # DATA + "dfs.namenode.name.dir" = "/hdfs/dfs/name"; + "dfs.datanode.data.dir" = "/hdfs/dfs/data"; + + # HDFS SECURITY + "dfs.block.access.token.enable" = "true"; + + # NAME NODE SECURITY + "dfs.namenode.keytab.file" = keytab_path; + "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; + "dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; + + # SECONDARY NAME NODE SECURITY + "dfs.secondary.namenode.keytab.file" = keytab_path; + "dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; + "dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; + + # DATA NODE SECURITY + "dfs.datanode.keytab.file" = keytab_path; + "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; + + # JOURNAL NODE SECURITY + "dfs.journalnode.keytab.file" = keytab_path; + "dfs.journalnode.kerberos.principal" = "jn/my.engine@MY.ENGINE"; + + # WEBHDFS SECURITY + "dfs.webhdfs.enabled" = "true"; + + # WEB AUTHENTICATION CONFIG + "dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE"; + "dfs.web.authentication.kerberos.keytab" = keytab_path; + "ignore.secure.ports.for.testing" = "true"; + "dfs.http.policy" = "HTTP_ONLY"; + "dfs.data.transfer.protection" = "privacy"; + + # ## MULTIHOMED + # "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + # "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; + # "dfs.namenode.http-bind-host" = "0.0.0.0"; + # "dfs.namenode.https-bind-host" = "0.0.0.0"; + # "dfs.client.use.datanode.hostname" = "true"; # force connection by hostname + # "dfs.datanode.use.datanode.hostname" = "true"; # force connection by hostname + }; + yarnSite = { + "yarn.nodemanager.admin-env" = "PATH=$PATH"; + "yarn.nodemanager.aux-services" = "mapreduce_shuffle"; + "yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler"; + "yarn.nodemanager.bind-host" = "0.0.0.0"; + "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; + "yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ"; + "yarn.nodemanager.linux-container-executor.group" = "hadoop"; + "yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor"; + "yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager"; + "yarn.resourcemanager.bind-host" = "0.0.0.0"; + "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; + + "yarn.resourcemanager.keytab" = keytab_path; + "yarn.resourcemanager.principal" = "rm/my.engine@MY.ENGINE"; + "yarn.nodemanager.keytab" = keytab_path; + "yarn.nodemanager.principal" = "nm/my.engine@MY.ENGINE"; + + # "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; + + "yarn.scheduler.capacity.root.queues" = "default"; + "yarn.scheduler.capacity.root.default.capacity" = 100; + # "yarn.scheduler.capacity.root.default.state" = "RUNNING"; + "yarn.scheduler.capacity.root.acl_submit_applications" = "hadoop,yarn,mapred,hdfs"; + }; + extraConfDirs = [ ]; + + hdfs = { + namenode = { enable = true; formatOnInit = true; restartIfChanged = true; }; + datanode = { enable = true; restartIfChanged = true; }; + journalnode = { enable = true; restartIfChanged = true; }; + zkfc = { enable = true; restartIfChanged = true; }; + httpfs = { enable = true; restartIfChanged = true; }; + }; + yarn = { + resourcemanager = { enable = true; restartIfChanged = true; }; + nodemanager = { enable = true; restartIfChanged = true; }; + }; + }; kerberos_server = { enable = true; @@ -236,8 +227,7 @@ in ''; }; - - + users.users.bertof.extraGroups = [ "hadoop" ]; systemd.services.spark-history = { path = with pkgs; [ procps openssh nettools ];