From 10c2df06ec13f1d27a201ec69329adb1e8e1d4da Mon Sep 17 00:00:00 2001 From: Filippo Berto Date: Wed, 30 Mar 2022 16:15:30 +0200 Subject: [PATCH] Update big data --- nixos/big_data.nix | 39 +++++++++++++++++++++++++++++---------- nixos/thor.nix | 23 ----------------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/nixos/big_data.nix b/nixos/big_data.nix index eef0133..f77009e 100644 --- a/nixos/big_data.nix +++ b/nixos/big_data.nix @@ -1,6 +1,24 @@ { config, lib, pkgs, ... }: let - keytab_path = /etc/hadoop.keytab; + setup_scirpt = '' + sudo mkdir -p /hdfs + sudo chown -R hdfs:hadoop /hdfs + + for p in {nn,dn,jn,rm,nm,jhs,HTTP}; do + sudo kadmin.local -q "ank -randkey $p/my.engine"; + sudo kadmin.local -q "xst -k /etc/hadoop.keytab $p/my.engine"; + sudo kadmin.local -q "ktrem -k /etc/hadoop.keytab $p/my.engine old" + done + sudo chown hdfs:hadoop /etc/hadoop.keytab + + + sudo kadmin.local -q "ank -randkey spark/my.engine"; + sudo kadmin.local -q "xst -k /etc/spark.keytab spark/my.engine"; + sudo kadmin.local -q "ktrem -k /etc/spark.keytab spark/my.engine old" + sudo chown spark:spark /etc/spark.keytab + ''; + hadoop_keytab_path = "/etc/hadoop.keytab"; + spark_keytab_path = "/etc/spark.keytab"; pysparkPackageSelector = p: with p; [ numpy pyspark ]; pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector; hadoopConf = import { @@ -48,11 +66,11 @@ let spark.eventLog.enabled true spark.eventLog.dir hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark - # spark.yarn.keytab ${keytab_path} + # spark.yarn.keytab ${hadoop_keytab_path} # spark.yarn.principal spark/my.engine@MY.ENGINE spark.history.ui.acls.enable true spark.history.kerberos.enabled true - spark.history.kerberos.keytab ${keytab_path} + spark.history.kerberos.keytab ${hadoop_keytab_path} spark.history.kerberos.principal spark/my.engine@MY.ENGINE spark.yarn.appMasterEnv.PYSPARK_PYTHON ${pysparkEnv.outPath}/bin/${pysparkEnv.executable} spark.yarn.appMasterEnv.PYTHONPATH ${pysparkEnv.outPath}/lib/${pysparkEnv.executable}/site-packages @@ -109,26 +127,27 @@ in # DATA "dfs.namenode.name.dir" = "/hdfs/dfs/name"; "dfs.datanode.data.dir" = "/hdfs/dfs/data"; + "dfs.journalnode.edits.dir" = "/hdfs/dfs/edits"; # HDFS SECURITY "dfs.block.access.token.enable" = "true"; # NAME NODE SECURITY - "dfs.namenode.keytab.file" = keytab_path; + "dfs.namenode.keytab.file" = hadoop_keytab_path; "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; "dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; # SECONDARY NAME NODE SECURITY - "dfs.secondary.namenode.keytab.file" = keytab_path; + "dfs.secondary.namenode.keytab.file" = hadoop_keytab_path; "dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; "dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; # DATA NODE SECURITY - "dfs.datanode.keytab.file" = keytab_path; + "dfs.datanode.keytab.file" = hadoop_keytab_path; "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; # JOURNAL NODE SECURITY - "dfs.journalnode.keytab.file" = keytab_path; + "dfs.journalnode.keytab.file" = hadoop_keytab_path; "dfs.journalnode.kerberos.principal" = "jn/my.engine@MY.ENGINE"; # WEBHDFS SECURITY @@ -136,7 +155,7 @@ in # WEB AUTHENTICATION CONFIG "dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE"; - "dfs.web.authentication.kerberos.keytab" = keytab_path; + "dfs.web.authentication.kerberos.keytab" = hadoop_keytab_path; "ignore.secure.ports.for.testing" = "true"; "dfs.http.policy" = "HTTP_ONLY"; "dfs.data.transfer.protection" = "privacy"; @@ -162,9 +181,9 @@ in "yarn.resourcemanager.bind-host" = "0.0.0.0"; "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; - "yarn.resourcemanager.keytab" = keytab_path; + "yarn.resourcemanager.keytab" = hadoop_keytab_path; "yarn.resourcemanager.principal" = "rm/my.engine@MY.ENGINE"; - "yarn.nodemanager.keytab" = keytab_path; + "yarn.nodemanager.keytab" = hadoop_keytab_path; "yarn.nodemanager.principal" = "nm/my.engine@MY.ENGINE"; # "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; diff --git a/nixos/thor.nix b/nixos/thor.nix index 7b41f46..e681db1 100644 --- a/nixos/thor.nix +++ b/nixos/thor.nix @@ -172,29 +172,6 @@ # teamviewer.enable = true; }; - systemd.services.spark-history = { - path = with pkgs; [ procps openssh nettools ]; - description = "spark history service."; - after = [ "network.target" ]; - wantedBy = [ "multi-user.target" ]; - restartIfChanged = true; - environment = { - SPARK_CONF_DIR = sparkConfDir; - SPARK_LOG_DIR = "/var/log/spark"; - }; - serviceConfig = { - Type = "forking"; - User = "spark"; - Group = "spark"; - WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}"; - ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh"; - ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh"; - TimeoutSec = 300; - StartLimitBurst = 10; - Restart = "always"; - }; - }; - services.teamviewer.enable = true; security = {