From 9e4879f7a0f8b36015d4b91c59bfe540f8d42d90 Mon Sep 17 00:00:00 2001
From: Filippo Berto <berto.f@protonmail.com>
Date: Mon, 21 Feb 2022 14:37:23 +0100
Subject: [PATCH] Big data config for laptop

---
 nixos/base.nix                          | 120 +++++++++++++-
 spark_conf/fairscheduler.xml.template   |  31 ----
 spark_conf/log4j.properties             |  46 ------
 spark_conf/log4j.properties.template    |  46 ------
 spark_conf/metrics.properties.template  | 210 ------------------------
 spark_conf/spark-defaults.conf          |  39 -----
 spark_conf/spark-defaults.conf.template |  27 ---
 spark_conf/spark-env.sh                 |   7 -
 spark_conf/spark-env.sh.template        |  73 --------
 spark_conf/workers.template             |  19 ---
 10 files changed, 119 insertions(+), 499 deletions(-)
 delete mode 100644 spark_conf/fairscheduler.xml.template
 delete mode 100644 spark_conf/log4j.properties
 delete mode 100644 spark_conf/log4j.properties.template
 delete mode 100644 spark_conf/metrics.properties.template
 delete mode 100644 spark_conf/spark-defaults.conf
 delete mode 100644 spark_conf/spark-defaults.conf.template
 delete mode 100644 spark_conf/spark-env.sh
 delete mode 100755 spark_conf/spark-env.sh.template
 delete mode 100644 spark_conf/workers.template

diff --git a/nixos/base.nix b/nixos/base.nix
index 4896ec9..4a68410 100644
--- a/nixos/base.nix
+++ b/nixos/base.nix
@@ -1,4 +1,46 @@
-{ config, pkgs, lib, ... }: {
+{ config, pkgs, lib, ... }:
+
+let sparkConfDir = pkgs.stdenv.mkDerivation {
+  name = "spark-config";
+  dontUnpack = true;
+  installPhase = ''
+    # source standard environment
+    . $stdenv/setup
+
+    # shorthands
+    base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/
+
+    # create output dirs for new derivation
+    mkdir -p $out/
+
+    # link unchanged files from the original gnome-session
+    for f in $base_conf/*.template ; do
+       ln -sf $f $out/
+    done
+
+    # change selected files
+    cp $out/log4j.properties{.template,}
+
+    cat > $out/spark-env.sh <<- STOP
+    export JAVA_HOME="${pkgs.jdk8}"
+    export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
+    export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
+    export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
+    export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
+    export SPARKR_R_SHELL="${pkgs.R}/bin/R"
+    export PATH="\$PATH:${pkgs.R}/bin"
+    STOP
+
+    cat > $out/spark-defaults.conf <<- STOP
+    spark.eventLog.enabled                  true
+    spark.eventLog.dir                      hdfs://localhost:/logs/spark
+    spark.history.fs.logDirectory           hdfs://localhost:/logs/spark
+    STOP
+  '';
+};
+in
+
+{
 
   imports = [
     <nixos-hardware/common/cpu/intel>
@@ -210,6 +252,82 @@
   services.power-profiles-daemon.enable = true;
 
   # services.teamviewer.enable = true;
+  services = {
+
+    spark = {
+      master = {
+        enable = true;
+        restartIfChanged = true;
+      };
+      worker = {
+        enable = true;
+        restartIfChanged = true;
+      };
+      confDir = sparkConfDir;
+    };
+
+    hadoop = {
+      coreSite = {
+        "fs.defaultFS" = "hdfs://localhost:8020";
+      };
+      hdfsSite = {
+        "dfs.namenode.rpc-bind-host" = "0.0.0.0";
+        "dfs.permissions" = "false";
+      };
+
+      hdfs = {
+        namenode = {
+          enable = true;
+          formatOnInit = true;
+          restartIfChanged = true;
+        };
+        datanode = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        journalnode = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        zkfc = {
+          enable = true;
+          restartIfChanged = true;
+        };
+        httpfs = {
+          enable = true;
+          restartIfChanged = true;
+        };
+      };
+      yarn = {
+        resourcemanager.enable = true;
+        nodemanager.enable = true;
+      };
+    };
+  };
+
+  systemd.services.spark-history = {
+    path = with pkgs; [ procps openssh nettools ];
+    description = "spark history service.";
+    after = [ "network.target" ];
+    wantedBy = [ "multi-user.target" ];
+    restartIfChanged = true;
+    environment = {
+      SPARK_CONF_DIR = sparkConfDir;
+      SPARK_LOG_DIR = "/var/log/spark";
+    };
+    serviceConfig = {
+      Type = "forking";
+      User = "spark";
+      Group = "spark";
+      WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}";
+      ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh";
+      ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh";
+      TimeoutSec = 300;
+      StartLimitBurst = 10;
+      Restart = "always";
+    };
+  };
+
 
   # Virtualisation
   virtualisation = {
diff --git a/spark_conf/fairscheduler.xml.template b/spark_conf/fairscheduler.xml.template
deleted file mode 100644
index 385b2e7..0000000
--- a/spark_conf/fairscheduler.xml.template
+++ /dev/null
@@ -1,31 +0,0 @@
-<?xml version="1.0"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<allocations>
-  <pool name="production">
-    <schedulingMode>FAIR</schedulingMode>
-    <weight>1</weight>
-    <minShare>2</minShare>
-  </pool>
-  <pool name="test">
-    <schedulingMode>FIFO</schedulingMode>
-    <weight>2</weight>
-    <minShare>3</minShare>
-  </pool>
-</allocations>
diff --git a/spark_conf/log4j.properties b/spark_conf/log4j.properties
deleted file mode 100644
index dc7b9ea..0000000
--- a/spark_conf/log4j.properties
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-# Set the default spark-shell log level to WARN. When running the spark-shell, the
-# log level for this class is used to overwrite the root logger's log level, so that
-# the user can have different defaults for the shell and regular Spark apps.
-log4j.logger.org.apache.spark.repl.Main=WARN
-
-# Settings to quiet third party logs that are too verbose
-log4j.logger.org.sparkproject.jetty=WARN
-log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
-log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.org.apache.parquet=ERROR
-log4j.logger.parquet=ERROR
-
-# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
-log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
-log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
-
-# For deploying Spark ThriftServer
-# SPARK-34128：Suppress undesirable TTransportException warnings involved in THRIFT-4805
-log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
-log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
-log4j.appender.console.filter.1.AcceptOnMatch=false
diff --git a/spark_conf/log4j.properties.template b/spark_conf/log4j.properties.template
deleted file mode 100644
index dc7b9ea..0000000
--- a/spark_conf/log4j.properties.template
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-# Set the default spark-shell log level to WARN. When running the spark-shell, the
-# log level for this class is used to overwrite the root logger's log level, so that
-# the user can have different defaults for the shell and regular Spark apps.
-log4j.logger.org.apache.spark.repl.Main=WARN
-
-# Settings to quiet third party logs that are too verbose
-log4j.logger.org.sparkproject.jetty=WARN
-log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
-log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.org.apache.parquet=ERROR
-log4j.logger.parquet=ERROR
-
-# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
-log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
-log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
-
-# For deploying Spark ThriftServer
-# SPARK-34128：Suppress undesirable TTransportException warnings involved in THRIFT-4805
-log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
-log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
-log4j.appender.console.filter.1.AcceptOnMatch=false
diff --git a/spark_conf/metrics.properties.template b/spark_conf/metrics.properties.template
deleted file mode 100644
index f52d33f..0000000
--- a/spark_conf/metrics.properties.template
+++ /dev/null
@@ -1,210 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#  syntax: [instance].sink|source.[name].[options]=[value]
-
-#  This file configures Spark's internal metrics system. The metrics system is
-#  divided into instances which correspond to internal components.
-#  Each instance can be configured to report its metrics to one or more sinks.
-#  Accepted values for [instance] are "master", "worker", "executor", "driver",
-#  and "applications". A wildcard "*" can be used as an instance name, in
-#  which case all instances will inherit the supplied property.
-#
-#  Within an instance, a "source" specifies a particular set of grouped metrics.
-#  there are two kinds of sources:
-#    1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
-#    collect a Spark component's internal state. Each instance is paired with a
-#    Spark source that is added automatically.
-#    2. Common sources, like JvmSource, which will collect low level state.
-#    These can be added through configuration options and are then loaded
-#    using reflection.
-#
-#  A "sink" specifies where metrics are delivered to. Each instance can be
-#  assigned one or more sinks.
-#
-#  The sink|source field specifies whether the property relates to a sink or
-#  source.
-#
-#  The [name] field specifies the name of source or sink.
-#
-#  The [options] field is the specific property of this source or sink. The
-#  source or sink is responsible for parsing this property.
-#
-#  Notes:
-#    1. To add a new sink, set the "class" option to a fully qualified class
-#    name (see examples below).
-#    2. Some sinks involve a polling period. The minimum allowed polling period
-#    is 1 second.
-#    3. Wildcard properties can be overridden by more specific properties.
-#    For example, master.sink.console.period takes precedence over
-#    *.sink.console.period.
-#    4. A metrics specific configuration
-#    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
-#    added to Java properties using -Dspark.metrics.conf=xxx if you want to
-#    customize metrics system. You can also put the file in ${SPARK_HOME}/conf
-#    and it will be loaded automatically.
-#    5. The MetricsServlet sink is added by default as a sink in the master,
-#    worker and driver, and you can send HTTP requests to the "/metrics/json"
-#    endpoint to get a snapshot of all the registered metrics in JSON format.
-#    For master, requests to the "/metrics/master/json" and
-#    "/metrics/applications/json" endpoints can be sent separately to get
-#    metrics snapshots of the master instance and applications. This
-#    MetricsServlet does not have to be configured.
-#    6. The metrics system can also be configured using Spark configuration
-#    parameters. The relevant parameter names are formed by adding the
-#    prefix "spark.metrics.conf." to the configuration entries detailed in
-#    this file (see examples below).
-
-## List of available common sources and their properties.
-
-# org.apache.spark.metrics.source.JvmSource
-#   Note: Currently, JvmSource is the only available common source.
-#         It can be added to an instance by setting the "class" option to its
-#         fully qualified class name (see examples below).
-
-## List of available sinks and their properties.
-
-# org.apache.spark.metrics.sink.ConsoleSink
-#   Name:   Default:   Description:
-#   period  10         Poll period
-#   unit    seconds    Unit of the poll period
-
-# org.apache.spark.metrics.sink.CSVSink
-#   Name:     Default:   Description:
-#   period    10         Poll period
-#   unit      seconds    Unit of the poll period
-#   directory /tmp       Where to store CSV files
-
-# org.apache.spark.metrics.sink.GangliaSink
-#   Name:     Default:   Description:
-#   host      NONE       Hostname or multicast group of the Ganglia server,
-#                        must be set
-#   port      NONE       Port of the Ganglia server(s), must be set
-#   period    10         Poll period
-#   unit      seconds    Unit of the poll period
-#   ttl       1          TTL of messages sent by Ganglia
-#   dmax      0          Lifetime in seconds of metrics (0 never expired)
-#   mode      multicast  Ganglia network mode ('unicast' or 'multicast')
-
-# org.apache.spark.metrics.sink.JmxSink
-
-# org.apache.spark.metrics.sink.MetricsServlet
-#   Name:     Default:   Description:
-#   path      VARIES*    Path prefix from the web server root
-#   sample    false      Whether to show entire set of samples for histograms
-#                        ('false' or 'true')
-#
-# * Default path is /metrics/json for all instances except the master. The
-#   master has two paths:
-#     /metrics/applications/json # App information
-#     /metrics/master/json       # Master information
-
-# org.apache.spark.metrics.sink.PrometheusServlet
-#   Name:     Default:   Description:
-#   path      VARIES*    Path prefix from the web server root
-#
-# * Default path is /metrics/prometheus for all instances except the master. The
-#   master has two paths:
-#     /metrics/applications/prometheus # App information
-#     /metrics/master/prometheus       # Master information
-
-# org.apache.spark.metrics.sink.GraphiteSink
-#   Name:     Default:      Description:
-#   host      NONE          Hostname of the Graphite server, must be set
-#   port      NONE          Port of the Graphite server, must be set
-#   period    10            Poll period
-#   unit      seconds       Unit of the poll period
-#   prefix    EMPTY STRING  Prefix to prepend to every metric's name
-#   protocol  tcp           Protocol ("tcp" or "udp") to use
-#   regex     NONE          Optional filter to send only metrics matching this regex string
-
-# org.apache.spark.metrics.sink.StatsdSink
-#   Name:     Default:      Description:
-#   host      127.0.0.1     Hostname or IP of StatsD server
-#   port      8125          Port of StatsD server
-#   period    10            Poll period
-#   unit      seconds       Units of poll period
-#   prefix    EMPTY STRING  Prefix to prepend to metric name
-
-## Examples
-# Enable JmxSink for all instances by class name
-#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
-
-# Enable ConsoleSink for all instances by class name
-#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
-
-# Enable StatsdSink for all instances by class name
-#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink
-#*.sink.statsd.prefix=spark
-
-# Polling period for the ConsoleSink
-#*.sink.console.period=10
-# Unit of the polling period for the ConsoleSink
-#*.sink.console.unit=seconds
-
-# Polling period for the ConsoleSink specific for the master instance
-#master.sink.console.period=15
-# Unit of the polling period for the ConsoleSink specific for the master
-# instance
-#master.sink.console.unit=seconds
-
-# Enable CsvSink for all instances by class name
-#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
-
-# Polling period for the CsvSink
-#*.sink.csv.period=1
-# Unit of the polling period for the CsvSink
-#*.sink.csv.unit=minutes
-
-# Polling directory for CsvSink
-#*.sink.csv.directory=/tmp/
-
-# Polling period for the CsvSink specific for the worker instance
-#worker.sink.csv.period=10
-# Unit of the polling period for the CsvSink specific for the worker instance
-#worker.sink.csv.unit=minutes
-
-# Enable Slf4jSink for all instances by class name
-#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
-
-# Polling period for the Slf4JSink
-#*.sink.slf4j.period=1
-# Unit of the polling period for the Slf4jSink
-#*.sink.slf4j.unit=minutes
-
-# Example configuration for Graphite sink
-#*.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink
-#*.sink.graphite.host=<graphiteEndPoint_hostName>
-#*.sink.graphite.port=<listening_port>
-#*.sink.graphite.period=10
-#*.sink.graphite.unit=seconds
-#*.sink.graphite.prefix=<optional_value>
-
-# Enable JvmSource for instance master, worker, driver and executor
-#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
-
-#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
-
-#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
-
-#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
-
-# Example configuration for PrometheusServlet
-#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet
-#*.sink.prometheusServlet.path=/metrics/prometheus
-#master.sink.prometheusServlet.path=/metrics/master/prometheus
-#applications.sink.prometheusServlet.path=/metrics/applications/prometheus
diff --git a/spark_conf/spark-defaults.conf b/spark_conf/spark-defaults.conf
deleted file mode 100644
index 678d9c8..0000000
--- a/spark_conf/spark-defaults.conf
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Default system properties included when running spark-submit.
-# This is useful for setting default environmental settings.
-
-# Example:
-# spark.master                     spark://master:7077
-# spark.eventLog.enabled           true
-# spark.eventLog.dir               hdfs://namenode:8021/directory
-# spark.serializer                 org.apache.spark.serializer.KryoSerializer
-# spark.driver.memory              5g
-# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
-
-# spark.io.compression.codec	  lzf
-# spark.io.compression.codec	  org.apache.spark.io.SnappyCompressionCodec
-# spark.eventLog.compress			  false
-
-spark.eventLog.enabled          		true
-spark.eventLog.dir              		hdfs://localhost:/logs/spark
-spark.history.fs.logDirectory   		hdfs://localhost:/logs/spark
-
-
-#
-#
diff --git a/spark_conf/spark-defaults.conf.template b/spark_conf/spark-defaults.conf.template
deleted file mode 100644
index 19cba6e..0000000
--- a/spark_conf/spark-defaults.conf.template
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Default system properties included when running spark-submit.
-# This is useful for setting default environmental settings.
-
-# Example:
-# spark.master                     spark://master:7077
-# spark.eventLog.enabled           true
-# spark.eventLog.dir               hdfs://namenode:8021/directory
-# spark.serializer                 org.apache.spark.serializer.KryoSerializer
-# spark.driver.memory              5g
-# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
diff --git a/spark_conf/spark-env.sh b/spark_conf/spark-env.sh
deleted file mode 100644
index 1d67e58..0000000
--- a/spark_conf/spark-env.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-export JAVA_HOME="/nix/store/d0akdmr675jrlabv7n8syg8yrg1zlyxz-openjdk-8u272-b10"
-export SPARK_HOME="/nix/store/zhj5q1pi0bs2lpc0lbkw8qkg03ywx9b8-spark-3.1.2/lib/spark-3.1.2"
-export SPARK_DIST_CLASSPATH=/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/mapreduce/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/*
-export PYSPARK_PYTHON="/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/bin/python3.9"
-export PYTHONPATH="$PYTHONPATH:/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/lib/python3.9/site-packages"
-export SPARKR_R_SHELL="/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin/R"
-export PATH="$PATH:/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin"
diff --git a/spark_conf/spark-env.sh.template b/spark_conf/spark-env.sh.template
deleted file mode 100755
index c868650..0000000
--- a/spark_conf/spark-env.sh.template
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/nix/store/vfai0jim0db67nk9rd7ziq29jxb5n79n-bash-5.1-p8/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# This file is sourced when running various Spark programs.
-# Copy it as spark-env.sh and edit that to configure Spark for your site.
-
-# Options read when launching programs locally with
-# ./bin/run-example or ./bin/spark-submit
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
-
-# Options read by executors and drivers running inside the cluster
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
-# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
-# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
-
-# Options read in YARN client/cluster mode
-# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
-# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
-# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
-# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
-
-# Options for the daemons used in the standalone deploy mode
-# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
-# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
-# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
-# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
-# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
-# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
-# - SPARK_WORKER_DIR, to set the working directory of worker processes
-# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
-# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
-# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
-# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
-# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
-# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
-# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
-
-# Options for launcher
-# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
-
-# Generic options for the daemons used in the standalone deploy mode
-# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
-# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
-# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5.
-# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
-# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
-# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
-# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
-# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
-# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
-# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
-# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
diff --git a/spark_conf/workers.template b/spark_conf/workers.template
deleted file mode 100644
index be42a63..0000000
--- a/spark_conf/workers.template
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# A Spark Worker will be started on each of the machines listed below.
-localhost
\ No newline at end of file