diff --git a/nixos/big_data.nix b/nixos/big_data.nix index 44e8aeb..d4d161e 100644 --- a/nixos/big_data.nix +++ b/nixos/big_data.nix @@ -40,6 +40,17 @@ let sparkConfDir = pkgs.stdenv.mkDerivation { in { + networking = { + hosts = { + "127.0.0.1" = [ + "ds.my.engine" + "kdc.my.engine" + "my.engine" + ]; + }; + + }; + services = { spark = { master = { @@ -53,83 +64,181 @@ in confDir = sparkConfDir; }; - hadoop = { - coreSite = { - "fs.defaultFS" = "hdfs://localhost:8020"; - }; - hdfsSite = { - "dfs.namenode.rpc-bind-host" = "0.0.0.0"; - "dfs.permissions" = "false"; - "dfs.namenode.name.dir" = "/hdfs/dfs/name"; - "dfs.datanode.data.dir" = "/hdfs/dfs/data"; - }; - hdfs = { - namenode = { - enable = true; - formatOnInit = true; - restartIfChanged = true; + + hadoop = + let + keytab_path = /etc/hadoop.keytab; + in + + { + coreSite = { + # "fs.defaultFS" = "hdfs://0.0.0.0:8020"; + + # "hadoop.http.authentication.simple.anonymous.allowed" = "false"; + # "hadoop.http.authentication.signature.secret.file" = "/var/lib/hadoop/security/http_secret"; + # "hadoop.http.authentication.type" = "kerberos"; + # "hadoop.http.authentication.kerberos.principal" = "http/my.engine@MY.ENGINE"; + # "hadoop.http.authentication.cookie.domain" = "my.engine"; + + # "hadoop.security.authentication" = "kerberos"; + # "hadoop.security.authorization" = "true"; + # "hadoop.rpc.protection" = "authentication"; + + + # "hadoop.rpc.protection" = "authentication"; + # "hadoop.security.auth_to_local" = '' + # RULE:[2:$1/$2@$0]([ndj]n/.*@MY.ENGINE)s/.*/hdfs/ + # RULE:[2:$1/$2@$0]([rn]m/.*@MY.ENGINE)s/.*/yarn/ + # RULE:[2:$1/$2@$0](jhs/.*@MY.ENGINE)s/.*/mapred/ + # DEFAULT + # ''; + # "hadoop.proxyuser.superuser.hosts" = "*"; # TODO: restrict + # "hadoop.proxyuser.superuser.groups" = "*"; # TODO: restrict + + "fs.defaultFS" = "hdfs://my.engine:8020"; + + # HDFS IMPERSONATION + "hadoop.proxyuser.hdfs.hosts" = "*"; + "hadoop.proxyuser.hdfs.groups" = "*"; + + # HIVE IMPERSONATION + "hadoop.proxyuser.hive.hosts" = "*"; + "hadoop.proxyuser.hive.groups" = "*"; + + # ENABLE AUTHENTICATION + "hadoop.security.authentication" = "kerberos"; + "hadoop.security.authorization" = "true"; + "hadoop.rpc.protection" = "privacy"; + + "hadoop.security.auth_to_local" = '' + RULE:[2:$1/$2@$0]([ndj]n/.*@MY\.ENGINE)s/.*/hdfs/ + RULE:[2:$1/$2@$0]([rn]m/.*@MY\.ENGINE)s/.*/yarn/ + RULE:[2:$1/$2@$0](jhs/.*@MY\.ENGINE)s/.*/mapred/ + DEFAULT + ''; }; - datanode = { - enable = true; - restartIfChanged = true; + hdfsSite = { + # DATA + "dfs.namenode.name.dir" = "/hdfs/dfs/name"; + "dfs.datanode.data.dir" = "/hdfs/dfs/data"; + + # HDFS SECURITY + "dfs.block.access.token.enable" = "true"; + + # NAME NODE SECURITY + "dfs.namenode.keytab.file" = keytab_path; + "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; + "dfs.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; + + # SECONDARY NAME NODE SECURITY + "dfs.secondary.namenode.keytab.file" = keytab_path; + "dfs.secondary.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; + "dfs.secondary.namenode.kerberos.internal.spnego.principal" = "HTTP/my.engine@MY.ENGINE"; + + # DATA NODE SECURITY + "dfs.datanode.keytab.file" = keytab_path; + "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; + + # WEBHDFS SECURITY + "dfs.webhdfs.enabled" = "true"; + + # WEB AUTHENTICATION CONFIG + "dfs.web.authentication.kerberos.principal" = "HTTP/my.engine@MY.ENGINE"; + "dfs.web.authentication.kerberos.keytab" = keytab_path; + "ignore.secure.ports.for.testing" = "true"; + "dfs.http.policy" = "HTTP_ONLY"; + "dfs.data.transfer.protection" = "privacy"; + + # ## MULTIHOMED + # "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + # "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; + # "dfs.namenode.http-bind-host" = "0.0.0.0"; + # "dfs.namenode.https-bind-host" = "0.0.0.0"; + # "dfs.client.use.datanode.hostname" = "true"; # force connection by hostname + # "dfs.datanode.use.datanode.hostname" = "true"; # force connection by hostname + + + # "dfs.data.transfer.protection" = "privacy"; + # "hadoop.rpc.protection" = "privacy"; + # "dfs.http.policy" = "HTTP_ONLY"; + # "dfs.datanode.address" = "0.0.0.0:10019"; + # "dfs.datanode.http.address" = "0.0.0.0:10022"; + # "dfs.datanode.https.address" = "0.0.0.0:10023"; + + + # "dfs.datanode.kerberos.principal" = "dn/my.engine@MY.ENGINE"; + # "dfs.datanode.keytab.file" = keytab_path; + + # "dfs.namenode.kerberos.principal" = "nn/my.engine@MY.ENGINE"; + # "dfs.namenode.keytab.file" = keytab_path; + + # "dfs.block.access.token.enable" = "true"; + + }; - journalnode = { - enable = true; - restartIfChanged = true; + yarnSite = { + # "yarn.acl.enable" = "true"; + # "yarn.admin.acl" = "*"; # TODO: restrict }; - zkfc = { - enable = true; - restartIfChanged = true; - }; - httpfs = { - enable = true; - restartIfChanged = true; + extraConfDirs = [ ]; + + hdfs = { + namenode = { enable = true; formatOnInit = true; restartIfChanged = true; }; + datanode = { enable = true; restartIfChanged = true; }; + journalnode = { enable = true; restartIfChanged = true; }; + zkfc = { enable = true; restartIfChanged = true; }; + httpfs = { enable = true; restartIfChanged = true; }; }; + yarn = { resourcemanager.enable = true; nodemanager.enable = true; }; }; - yarn = { - resourcemanager.enable = true; - nodemanager.enable = true; - }; - }; kerberos_server = { enable = true; - realms."ATHENA.MIT.EDU" = { - acl = [ - { access = "all"; principal = "*/admin"; } - { access = "all"; principal = "admin"; } - ]; - }; + realms."MY.ENGINE".acl = [ + { principal = "*/admin"; access = "all"; } + { principal = "admin"; access = "all"; } + { principal = "*/localhost"; access = "all"; } + { principal = "*/my.engine"; access = "all"; } + { principal = "nn/my.engine"; access = "all"; } + { principal = "hdfs"; access = "all"; } + ]; }; }; krb5 = { enable = true; - realms."ATHENA.MIT.EDU" = { - admin_server = "localhost"; - kdc = [ - "localhost" - ]; - kpasswd_server = "localhost"; + realms = { + "MY.ENGINE" = { + admin_server = "kdc.my.engine"; + kdc = "kdc.my.engine"; + # default_domain = "my.engine"; + # kpasswd_server = "odin"; + }; }; domain_realm = { - ".athena.mit.edu" = "ATHENA.MIT.EDU"; - "athena.mit.edu" = "ATHENA.MIT.EDU"; + # ".my.engine" = "MY.ENGINE"; + "my.engine" = "MY.ENGINE"; }; libdefaults = { - default_realm = "ATHENA.MIT.EDU"; - dns_lookup_realm = false; - dns_lookup_kdc = false; + default_realm = "MY.ENGINE"; + dns_lookup_realm = true; + dns_lookup_kdc = true; + ticket_lifetime = "24h"; + renew_lifetime = "7d"; + forwardable = true; }; extraConfig = '' [logging] - default = FILE:/var/log/krb5.log + default = FILE:/var/log/krb5libs.log + kdc = FILE:/var/log/krb5kdc.log + admin_server = FILE:/var/log/kadmind.log ''; }; + systemd.services.spark-history = { path = with pkgs; [ procps openssh nettools ]; description = "spark history service.";