Update big data

This commit is contained in:
Filippo Berto 2022-03-30 16:15:30 +02:00
parent 52047104cc
commit 10c2df06ec
2 changed files with 29 additions and 33 deletions

View file

@ -1,6 +1,24 @@
{ config, lib, pkgs, ... }: { config, lib, pkgs, ... }:
let let
keytab_path = /etc/hadoop.keytab; setup_scirpt = ''
sudo mkdir -p /hdfs
sudo chown -R hdfs:hadoop /hdfs
for p in {nn,dn,jn,rm,nm,jhs,HTTP}; do
sudo kadmin.local -q "ank -randkey $p/my.engine";
sudo kadmin.local -q "xst -k /etc/hadoop.keytab $p/my.engine";
sudo kadmin.local -q "ktrem -k /etc/hadoop.keytab $p/my.engine old"
done
sudo chown hdfs:hadoop /etc/hadoop.keytab
sudo kadmin.local -q "ank -randkey spark/my.engine";
sudo kadmin.local -q "xst -k /etc/spark.keytab spark/my.engine";
sudo kadmin.local -q "ktrem -k /etc/spark.keytab spark/my.engine old"
sudo chown spark:spark /etc/spark.keytab
'';
hadoop_keytab_path = "/etc/hadoop.keytab";
spark_keytab_path = "/etc/spark.keytab";
pysparkPackageSelector = p: with p; [ numpy pyspark ]; pysparkPackageSelector = p: with p; [ numpy pyspark ];
pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector; pysparkEnv = pkgs.python3.withPackages pysparkPackageSelector;
hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> { hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> {
@ -48,11 +66,11 @@ let
spark.eventLog.enabled true spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark spark.history.fs.logDirectory hdfs://localhost:/logs/spark
# spark.yarn.keytab ${keytab_path} # spark.yarn.keytab ${hadoop_keytab_path}
# spark.yarn.principal spark/my.engine@MY.ENGINE # spark.yarn.principal spark/my.engine@MY.ENGINE
spark.history.ui.acls.enable true spark.history.ui.acls.enable true
spark.history.kerberos.enabled true spark.history.kerberos.enabled true
spark.history.kerberos.keytab ${keytab_path} spark.history.kerberos.keytab ${hadoop_keytab_path}
spark.history.kerberos.principal spark/my.engine@MY.ENGINE spark.history.kerberos.principal spark/my.engine@MY.ENGINE
spark.yarn.appMasterEnv.PYSPARK_PYTHON ${pysparkEnv.outPath}/bin/${pysparkEnv.executable} spark.yarn.appMasterEnv.PYSPARK_PYTHON ${pysparkEnv.outPath}/bin/${pysparkEnv.executable}
spark.yarn.appMasterEnv.PYTHONPATH ${pysparkEnv.outPath}/lib/${pysparkEnv.executable}/site-packages spark.yarn.appMasterEnv.PYTHONPATH ${pysparkEnv.outPath}/lib/${pysparkEnv.executable}/site-packages
@ -109,26 +127,27 @@ in
# DATA # DATA
"dfs.namenode.name.dir" = "/hdfs/dfs/name"; "dfs.namenode.name.dir" = "/hdfs/dfs/name";
"dfs.datanode.data.dir" = "/hdfs/dfs/data"; "dfs.datanode.data.dir" = "/hdfs/dfs/data";
"dfs.journalnode.edits.dir" = "/hdfs/dfs/edits";
# HDFS SECURITY # HDFS SECURITY
"dfs.block.access.token.enable" = "true"; "dfs.block.access.token.enable" = "true";
# NAME NODE SECURITY # NAME NODE SECURITY
"dfs.namenode.keytab.file" = keytab_path; "dfs.namenode.keytab.file" = hadoop_keytab_path;
"dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE";
"dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; "dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE";
# SECONDARY NAME NODE SECURITY # SECONDARY NAME NODE SECURITY
"dfs.secondary.namenode.keytab.file" = keytab_path; "dfs.secondary.namenode.keytab.file" = hadoop_keytab_path;
"dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; "dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE";
"dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; "dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE";
# DATA NODE SECURITY # DATA NODE SECURITY
"dfs.datanode.keytab.file" = keytab_path; "dfs.datanode.keytab.file" = hadoop_keytab_path;
"dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE";
# JOURNAL NODE SECURITY # JOURNAL NODE SECURITY
"dfs.journalnode.keytab.file" = keytab_path; "dfs.journalnode.keytab.file" = hadoop_keytab_path;
"dfs.journalnode.kerberos.principal" = "jn/my.engine@MY.ENGINE"; "dfs.journalnode.kerberos.principal" = "jn/my.engine@MY.ENGINE";
# WEBHDFS SECURITY # WEBHDFS SECURITY
@ -136,7 +155,7 @@ in
# WEB AUTHENTICATION CONFIG # WEB AUTHENTICATION CONFIG
"dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE"; "dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE";
"dfs.web.authentication.kerberos.keytab" = keytab_path; "dfs.web.authentication.kerberos.keytab" = hadoop_keytab_path;
"ignore.secure.ports.for.testing" = "true"; "ignore.secure.ports.for.testing" = "true";
"dfs.http.policy" = "HTTP_ONLY"; "dfs.http.policy" = "HTTP_ONLY";
"dfs.data.transfer.protection" = "privacy"; "dfs.data.transfer.protection" = "privacy";
@ -162,9 +181,9 @@ in
"yarn.resourcemanager.bind-host" = "0.0.0.0"; "yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
"yarn.resourcemanager.keytab" = keytab_path; "yarn.resourcemanager.keytab" = hadoop_keytab_path;
"yarn.resourcemanager.principal" = "rm/my.engine@MY.ENGINE"; "yarn.resourcemanager.principal" = "rm/my.engine@MY.ENGINE";
"yarn.nodemanager.keytab" = keytab_path; "yarn.nodemanager.keytab" = hadoop_keytab_path;
"yarn.nodemanager.principal" = "nm/my.engine@MY.ENGINE"; "yarn.nodemanager.principal" = "nm/my.engine@MY.ENGINE";
# "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor"; # "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";

View file

@ -172,29 +172,6 @@
# teamviewer.enable = true; # teamviewer.enable = true;
}; };
systemd.services.spark-history = {
path = with pkgs; [ procps openssh nettools ];
description = "spark history service.";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
restartIfChanged = true;
environment = {
SPARK_CONF_DIR = sparkConfDir;
SPARK_LOG_DIR = "/var/log/spark";
};
serviceConfig = {
Type = "forking";
User = "spark";
Group = "spark";
WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}";
ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh";
ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh";
TimeoutSec = 300;
StartLimitBurst = 10;
Restart = "always";
};
};
services.teamviewer.enable = true; services.teamviewer.enable = true;
security = { security = {