HDSF + KERBEROS

This commit is contained in:
Filippo Berto 2022-03-24 17:51:24 +01:00
parent 7ad8a748f1
commit a27dbfd681
2 changed files with 146 additions and 157 deletions

View file

@ -1,6 +1,5 @@
{ config, pkgs, lib, ... }:
{
imports = [
<nixos-hardware/common/cpu/intel>
<nixos-hardware/common/pc/laptop>
@ -171,7 +170,7 @@
};
};
services.dbus.packages = with pkgs; [ gnome.dconf ];
services.dbus.packages = with pkgs; [ dconf ];
services.gnome.gnome-keyring.enable = true;
hardware.bluetooth.enable = true;
# services.blueman.enable = true;

View file

@ -1,5 +1,12 @@
{ config, lib, pkgs, ... }:
let sparkConfDir = pkgs.stdenv.mkDerivation {
let
keytab_path = /etc/hadoop.keytab;
hadoopConf = import <nixos/nixos/modules/services/cluster/hadoop/conf.nix> {
inherit pkgs lib;
cfg = config.services.hadoop;
};
hadoopConfDir = "${hadoopConf}/";
sparkConfDir = pkgs.stdenv.mkDerivation {
name = "spark-config";
dontUnpack = true;
installPhase = ''
@ -26,6 +33,7 @@ let sparkConfDir = pkgs.stdenv.mkDerivation {
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
export HADOOP_CONF_DIR="${hadoopConfDir}"
export SPARKR_R_SHELL="${pkgs.R}/bin/R"
export PATH="\$PATH:${pkgs.R}/bin"
STOP
@ -34,9 +42,11 @@ let sparkConfDir = pkgs.stdenv.mkDerivation {
spark.eventLog.enabled true
spark.eventLog.dir hdfs://localhost:/logs/spark
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
spark.yarn.keytab ${keytab_path}
spark.yarn.principal spark/my.engine@MY.ENGINE
STOP
'';
};
};
in
{
@ -66,36 +76,8 @@ in
hadoop =
let
keytab_path = /etc/hadoop.keytab;
in
{
hadoop = {
coreSite = {
# "fs.defaultFS" = "hdfs://0.0.0.0:8020";
# "hadoop.http.authentication.simple.anonymous.allowed" = "false";
# "hadoop.http.authentication.signature.secret.file" = "/var/lib/hadoop/security/http_secret";
# "hadoop.http.authentication.type" = "kerberos";
# "hadoop.http.authentication.kerberos.principal" = "http/my.engine@MY.ENGINE";
# "hadoop.http.authentication.cookie.domain" = "my.engine";
# "hadoop.security.authentication" = "kerberos";
# "hadoop.security.authorization" = "true";
# "hadoop.rpc.protection" = "authentication";
# "hadoop.rpc.protection" = "authentication";
# "hadoop.security.auth_to_local" = ''
# RULE:[2:$1/$2@$0]([ndj]n/.*@MY.ENGINE)s/.*/hdfs/
# RULE:[2:$1/$2@$0]([rn]m/.*@MY.ENGINE)s/.*/yarn/
# RULE:[2:$1/$2@$0](jhs/.*@MY.ENGINE)s/.*/mapred/
# DEFAULT
# '';
# "hadoop.proxyuser.superuser.hosts" = "*"; # TODO: restrict
# "hadoop.proxyuser.superuser.groups" = "*"; # TODO: restrict
"fs.defaultFS" = "hdfs://my.engine:8020";
# HDFS IMPERSONATION
@ -140,6 +122,10 @@ in
"dfs.datanode.keytab.file" = keytab_path;
"dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE";
# JOURNAL NODE SECURITY
"dfs.journalnode.keytab.file" = keytab_path;
"dfs.journalnode.kerberos.principal" = "jn/my.engine@MY.ENGINE";
# WEBHDFS SECURITY
"dfs.webhdfs.enabled" = "true";
@ -157,29 +143,31 @@ in
# "dfs.namenode.https-bind-host" = "0.0.0.0";
# "dfs.client.use.datanode.hostname" = "true"; # force connection by hostname
# "dfs.datanode.use.datanode.hostname" = "true"; # force connection by hostname
# "dfs.data.transfer.protection" = "privacy";
# "hadoop.rpc.protection" = "privacy";
# "dfs.http.policy" = "HTTP_ONLY";
# "dfs.datanode.address" = "0.0.0.0:10019";
# "dfs.datanode.http.address" = "0.0.0.0:10022";
# "dfs.datanode.https.address" = "0.0.0.0:10023";
# "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE";
# "dfs.datanode.keytab.file" = keytab_path;
# "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE";
# "dfs.namenode.keytab.file" = keytab_path;
# "dfs.block.access.token.enable" = "true";
};
yarnSite = {
# "yarn.acl.enable" = "true";
# "yarn.admin.acl" = "*"; # TODO: restrict
"yarn.nodemanager.admin-env" = "PATH=$PATH";
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
"yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
"yarn.nodemanager.bind-host" = "0.0.0.0";
"yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
"yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
"yarn.nodemanager.linux-container-executor.group" = "hadoop";
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
"yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
"yarn.resourcemanager.keytab" = keytab_path;
"yarn.resourcemanager.principal" = "rm/my.engine@MY.ENGINE";
"yarn.nodemanager.keytab" = keytab_path;
"yarn.nodemanager.principal" = "nm/my.engine@MY.ENGINE";
# "yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
"yarn.scheduler.capacity.root.queues" = "default";
"yarn.scheduler.capacity.root.default.capacity" = 100;
# "yarn.scheduler.capacity.root.default.state" = "RUNNING";
"yarn.scheduler.capacity.root.acl_submit_applications" = "hadoop,yarn,mapred,hdfs";
};
extraConfDirs = [ ];
@ -190,7 +178,10 @@ in
zkfc = { enable = true; restartIfChanged = true; };
httpfs = { enable = true; restartIfChanged = true; };
};
yarn = { resourcemanager.enable = true; nodemanager.enable = true; };
yarn = {
resourcemanager = { enable = true; restartIfChanged = true; };
nodemanager = { enable = true; restartIfChanged = true; };
};
};
kerberos_server = {
@ -236,8 +227,7 @@ in
'';
};
users.users.bertof.extraGroups = [ "hadoop" ];
systemd.services.spark-history = {
path = with pkgs; [ procps openssh nettools ];