From 9e4879f7a0f8b36015d4b91c59bfe540f8d42d90 Mon Sep 17 00:00:00 2001 From: Filippo Berto Date: Mon, 21 Feb 2022 14:37:23 +0100 Subject: [PATCH] Big data config for laptop --- nixos/base.nix | 120 +++++++++++++- spark_conf/fairscheduler.xml.template | 31 ---- spark_conf/log4j.properties | 46 ------ spark_conf/log4j.properties.template | 46 ------ spark_conf/metrics.properties.template | 210 ------------------------ spark_conf/spark-defaults.conf | 39 ----- spark_conf/spark-defaults.conf.template | 27 --- spark_conf/spark-env.sh | 7 - spark_conf/spark-env.sh.template | 73 -------- spark_conf/workers.template | 19 --- 10 files changed, 119 insertions(+), 499 deletions(-) delete mode 100644 spark_conf/fairscheduler.xml.template delete mode 100644 spark_conf/log4j.properties delete mode 100644 spark_conf/log4j.properties.template delete mode 100644 spark_conf/metrics.properties.template delete mode 100644 spark_conf/spark-defaults.conf delete mode 100644 spark_conf/spark-defaults.conf.template delete mode 100644 spark_conf/spark-env.sh delete mode 100755 spark_conf/spark-env.sh.template delete mode 100644 spark_conf/workers.template diff --git a/nixos/base.nix b/nixos/base.nix index 4896ec9..4a68410 100644 --- a/nixos/base.nix +++ b/nixos/base.nix @@ -1,4 +1,46 @@ -{ config, pkgs, lib, ... }: { +{ config, pkgs, lib, ... }: + +let sparkConfDir = pkgs.stdenv.mkDerivation { + name = "spark-config"; + dontUnpack = true; + installPhase = '' + # source standard environment + . $stdenv/setup + + # shorthands + base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/ + + # create output dirs for new derivation + mkdir -p $out/ + + # link unchanged files from the original gnome-session + for f in $base_conf/*.template ; do + ln -sf $f $out/ + done + + # change selected files + cp $out/log4j.properties{.template,} + + cat > $out/spark-env.sh <<- STOP + export JAVA_HOME="${pkgs.jdk8}" + export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}" + export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath) + export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}" + export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" + export SPARKR_R_SHELL="${pkgs.R}/bin/R" + export PATH="\$PATH:${pkgs.R}/bin" + STOP + + cat > $out/spark-defaults.conf <<- STOP + spark.eventLog.enabled true + spark.eventLog.dir hdfs://localhost:/logs/spark + spark.history.fs.logDirectory hdfs://localhost:/logs/spark + STOP + ''; +}; +in + +{ imports = [ @@ -210,6 +252,82 @@ services.power-profiles-daemon.enable = true; # services.teamviewer.enable = true; + services = { + + spark = { + master = { + enable = true; + restartIfChanged = true; + }; + worker = { + enable = true; + restartIfChanged = true; + }; + confDir = sparkConfDir; + }; + + hadoop = { + coreSite = { + "fs.defaultFS" = "hdfs://localhost:8020"; + }; + hdfsSite = { + "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + "dfs.permissions" = "false"; + }; + + hdfs = { + namenode = { + enable = true; + formatOnInit = true; + restartIfChanged = true; + }; + datanode = { + enable = true; + restartIfChanged = true; + }; + journalnode = { + enable = true; + restartIfChanged = true; + }; + zkfc = { + enable = true; + restartIfChanged = true; + }; + httpfs = { + enable = true; + restartIfChanged = true; + }; + }; + yarn = { + resourcemanager.enable = true; + nodemanager.enable = true; + }; + }; + }; + + systemd.services.spark-history = { + path = with pkgs; [ procps openssh nettools ]; + description = "spark history service."; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + restartIfChanged = true; + environment = { + SPARK_CONF_DIR = sparkConfDir; + SPARK_LOG_DIR = "/var/log/spark"; + }; + serviceConfig = { + Type = "forking"; + User = "spark"; + Group = "spark"; + WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}"; + ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh"; + ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh"; + TimeoutSec = 300; + StartLimitBurst = 10; + Restart = "always"; + }; + }; + # Virtualisation virtualisation = { diff --git a/spark_conf/fairscheduler.xml.template b/spark_conf/fairscheduler.xml.template deleted file mode 100644 index 385b2e7..0000000 --- a/spark_conf/fairscheduler.xml.template +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - FAIR - 1 - 2 - - - FIFO - 2 - 3 - - diff --git a/spark_conf/log4j.properties b/spark_conf/log4j.properties deleted file mode 100644 index dc7b9ea..0000000 --- a/spark_conf/log4j.properties +++ /dev/null @@ -1,46 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Set the default spark-shell log level to WARN. When running the spark-shell, the -# log level for this class is used to overwrite the root logger's log level, so that -# the user can have different defaults for the shell and regular Spark apps. -log4j.logger.org.apache.spark.repl.Main=WARN - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.sparkproject.jetty=WARN -log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.org.apache.parquet=ERROR -log4j.logger.parquet=ERROR - -# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - -# For deploying Spark ThriftServer -# SPARK-34128:Suppress undesirable TTransportException warnings involved in THRIFT-4805 -log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter -log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message -log4j.appender.console.filter.1.AcceptOnMatch=false diff --git a/spark_conf/log4j.properties.template b/spark_conf/log4j.properties.template deleted file mode 100644 index dc7b9ea..0000000 --- a/spark_conf/log4j.properties.template +++ /dev/null @@ -1,46 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Set the default spark-shell log level to WARN. When running the spark-shell, the -# log level for this class is used to overwrite the root logger's log level, so that -# the user can have different defaults for the shell and regular Spark apps. -log4j.logger.org.apache.spark.repl.Main=WARN - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.sparkproject.jetty=WARN -log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.org.apache.parquet=ERROR -log4j.logger.parquet=ERROR - -# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - -# For deploying Spark ThriftServer -# SPARK-34128:Suppress undesirable TTransportException warnings involved in THRIFT-4805 -log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter -log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message -log4j.appender.console.filter.1.AcceptOnMatch=false diff --git a/spark_conf/metrics.properties.template b/spark_conf/metrics.properties.template deleted file mode 100644 index f52d33f..0000000 --- a/spark_conf/metrics.properties.template +++ /dev/null @@ -1,210 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# syntax: [instance].sink|source.[name].[options]=[value] - -# This file configures Spark's internal metrics system. The metrics system is -# divided into instances which correspond to internal components. -# Each instance can be configured to report its metrics to one or more sinks. -# Accepted values for [instance] are "master", "worker", "executor", "driver", -# and "applications". A wildcard "*" can be used as an instance name, in -# which case all instances will inherit the supplied property. -# -# Within an instance, a "source" specifies a particular set of grouped metrics. -# there are two kinds of sources: -# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will -# collect a Spark component's internal state. Each instance is paired with a -# Spark source that is added automatically. -# 2. Common sources, like JvmSource, which will collect low level state. -# These can be added through configuration options and are then loaded -# using reflection. -# -# A "sink" specifies where metrics are delivered to. Each instance can be -# assigned one or more sinks. -# -# The sink|source field specifies whether the property relates to a sink or -# source. -# -# The [name] field specifies the name of source or sink. -# -# The [options] field is the specific property of this source or sink. The -# source or sink is responsible for parsing this property. -# -# Notes: -# 1. To add a new sink, set the "class" option to a fully qualified class -# name (see examples below). -# 2. Some sinks involve a polling period. The minimum allowed polling period -# is 1 second. -# 3. Wildcard properties can be overridden by more specific properties. -# For example, master.sink.console.period takes precedence over -# *.sink.console.period. -# 4. A metrics specific configuration -# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be -# added to Java properties using -Dspark.metrics.conf=xxx if you want to -# customize metrics system. You can also put the file in ${SPARK_HOME}/conf -# and it will be loaded automatically. -# 5. The MetricsServlet sink is added by default as a sink in the master, -# worker and driver, and you can send HTTP requests to the "/metrics/json" -# endpoint to get a snapshot of all the registered metrics in JSON format. -# For master, requests to the "/metrics/master/json" and -# "/metrics/applications/json" endpoints can be sent separately to get -# metrics snapshots of the master instance and applications. This -# MetricsServlet does not have to be configured. -# 6. The metrics system can also be configured using Spark configuration -# parameters. The relevant parameter names are formed by adding the -# prefix "spark.metrics.conf." to the configuration entries detailed in -# this file (see examples below). - -## List of available common sources and their properties. - -# org.apache.spark.metrics.source.JvmSource -# Note: Currently, JvmSource is the only available common source. -# It can be added to an instance by setting the "class" option to its -# fully qualified class name (see examples below). - -## List of available sinks and their properties. - -# org.apache.spark.metrics.sink.ConsoleSink -# Name: Default: Description: -# period 10 Poll period -# unit seconds Unit of the poll period - -# org.apache.spark.metrics.sink.CSVSink -# Name: Default: Description: -# period 10 Poll period -# unit seconds Unit of the poll period -# directory /tmp Where to store CSV files - -# org.apache.spark.metrics.sink.GangliaSink -# Name: Default: Description: -# host NONE Hostname or multicast group of the Ganglia server, -# must be set -# port NONE Port of the Ganglia server(s), must be set -# period 10 Poll period -# unit seconds Unit of the poll period -# ttl 1 TTL of messages sent by Ganglia -# dmax 0 Lifetime in seconds of metrics (0 never expired) -# mode multicast Ganglia network mode ('unicast' or 'multicast') - -# org.apache.spark.metrics.sink.JmxSink - -# org.apache.spark.metrics.sink.MetricsServlet -# Name: Default: Description: -# path VARIES* Path prefix from the web server root -# sample false Whether to show entire set of samples for histograms -# ('false' or 'true') -# -# * Default path is /metrics/json for all instances except the master. The -# master has two paths: -# /metrics/applications/json # App information -# /metrics/master/json # Master information - -# org.apache.spark.metrics.sink.PrometheusServlet -# Name: Default: Description: -# path VARIES* Path prefix from the web server root -# -# * Default path is /metrics/prometheus for all instances except the master. The -# master has two paths: -# /metrics/applications/prometheus # App information -# /metrics/master/prometheus # Master information - -# org.apache.spark.metrics.sink.GraphiteSink -# Name: Default: Description: -# host NONE Hostname of the Graphite server, must be set -# port NONE Port of the Graphite server, must be set -# period 10 Poll period -# unit seconds Unit of the poll period -# prefix EMPTY STRING Prefix to prepend to every metric's name -# protocol tcp Protocol ("tcp" or "udp") to use -# regex NONE Optional filter to send only metrics matching this regex string - -# org.apache.spark.metrics.sink.StatsdSink -# Name: Default: Description: -# host 127.0.0.1 Hostname or IP of StatsD server -# port 8125 Port of StatsD server -# period 10 Poll period -# unit seconds Units of poll period -# prefix EMPTY STRING Prefix to prepend to metric name - -## Examples -# Enable JmxSink for all instances by class name -#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink - -# Enable ConsoleSink for all instances by class name -#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink - -# Enable StatsdSink for all instances by class name -#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink -#*.sink.statsd.prefix=spark - -# Polling period for the ConsoleSink -#*.sink.console.period=10 -# Unit of the polling period for the ConsoleSink -#*.sink.console.unit=seconds - -# Polling period for the ConsoleSink specific for the master instance -#master.sink.console.period=15 -# Unit of the polling period for the ConsoleSink specific for the master -# instance -#master.sink.console.unit=seconds - -# Enable CsvSink for all instances by class name -#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink - -# Polling period for the CsvSink -#*.sink.csv.period=1 -# Unit of the polling period for the CsvSink -#*.sink.csv.unit=minutes - -# Polling directory for CsvSink -#*.sink.csv.directory=/tmp/ - -# Polling period for the CsvSink specific for the worker instance -#worker.sink.csv.period=10 -# Unit of the polling period for the CsvSink specific for the worker instance -#worker.sink.csv.unit=minutes - -# Enable Slf4jSink for all instances by class name -#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink - -# Polling period for the Slf4JSink -#*.sink.slf4j.period=1 -# Unit of the polling period for the Slf4jSink -#*.sink.slf4j.unit=minutes - -# Example configuration for Graphite sink -#*.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink -#*.sink.graphite.host= -#*.sink.graphite.port= -#*.sink.graphite.period=10 -#*.sink.graphite.unit=seconds -#*.sink.graphite.prefix= - -# Enable JvmSource for instance master, worker, driver and executor -#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource - -#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource - -#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource - -#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource - -# Example configuration for PrometheusServlet -#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet -#*.sink.prometheusServlet.path=/metrics/prometheus -#master.sink.prometheusServlet.path=/metrics/master/prometheus -#applications.sink.prometheusServlet.path=/metrics/applications/prometheus diff --git a/spark_conf/spark-defaults.conf b/spark_conf/spark-defaults.conf deleted file mode 100644 index 678d9c8..0000000 --- a/spark_conf/spark-defaults.conf +++ /dev/null @@ -1,39 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Default system properties included when running spark-submit. -# This is useful for setting default environmental settings. - -# Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.driver.memory 5g -# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" - -# spark.io.compression.codec lzf -# spark.io.compression.codec org.apache.spark.io.SnappyCompressionCodec -# spark.eventLog.compress false - -spark.eventLog.enabled true -spark.eventLog.dir hdfs://localhost:/logs/spark -spark.history.fs.logDirectory hdfs://localhost:/logs/spark - - -# -# diff --git a/spark_conf/spark-defaults.conf.template b/spark_conf/spark-defaults.conf.template deleted file mode 100644 index 19cba6e..0000000 --- a/spark_conf/spark-defaults.conf.template +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Default system properties included when running spark-submit. -# This is useful for setting default environmental settings. - -# Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.driver.memory 5g -# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" diff --git a/spark_conf/spark-env.sh b/spark_conf/spark-env.sh deleted file mode 100644 index 1d67e58..0000000 --- a/spark_conf/spark-env.sh +++ /dev/null @@ -1,7 +0,0 @@ -export JAVA_HOME="/nix/store/d0akdmr675jrlabv7n8syg8yrg1zlyxz-openjdk-8u272-b10" -export SPARK_HOME="/nix/store/zhj5q1pi0bs2lpc0lbkw8qkg03ywx9b8-spark-3.1.2/lib/spark-3.1.2" -export SPARK_DIST_CLASSPATH=/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/mapreduce/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/* -export PYSPARK_PYTHON="/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/bin/python3.9" -export PYTHONPATH="$PYTHONPATH:/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/lib/python3.9/site-packages" -export SPARKR_R_SHELL="/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin/R" -export PATH="$PATH:/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin" diff --git a/spark_conf/spark-env.sh.template b/spark_conf/spark-env.sh.template deleted file mode 100755 index c868650..0000000 --- a/spark_conf/spark-env.sh.template +++ /dev/null @@ -1,73 +0,0 @@ -#!/nix/store/vfai0jim0db67nk9rd7ziq29jxb5n79n-bash-5.1-p8/bin/bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This file is sourced when running various Spark programs. -# Copy it as spark-env.sh and edit that to configure Spark for your site. - -# Options read when launching programs locally with -# ./bin/run-example or ./bin/spark-submit -# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node -# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program - -# Options read by executors and drivers running inside the cluster -# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node -# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program -# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data -# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos - -# Options read in YARN client/cluster mode -# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf) -# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files -# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN -# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). -# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) -# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) - -# Options for the daemons used in the standalone deploy mode -# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname -# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master -# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") -# - SPARK_WORKER_CORES, to set the number of cores to use on this machine -# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) -# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker -# - SPARK_WORKER_DIR, to set the working directory of worker processes -# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") -# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). -# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") -# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") -# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") -# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons -# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers - -# Options for launcher -# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y") - -# Generic options for the daemons used in the standalone deploy mode -# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) -# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) -# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5. -# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) -# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) -# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) -# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. -# Options for native BLAS, like Intel MKL, OpenBLAS, and so on. -# You might get better performance to enable these options if using native BLAS (see SPARK-21305). -# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL -# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS diff --git a/spark_conf/workers.template b/spark_conf/workers.template deleted file mode 100644 index be42a63..0000000 --- a/spark_conf/workers.template +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# A Spark Worker will be started on each of the machines listed below. -localhost \ No newline at end of file