Big data configuration
This commit is contained in:
parent
43f698a329
commit
72df315b4d
10 changed files with 634 additions and 2 deletions
138
nixos/thor.nix
138
nixos/thor.nix
|
|
@ -3,6 +3,46 @@
|
||||||
# and in the NixOS manual (accessible by running ‘nixos-help’).
|
# and in the NixOS manual (accessible by running ‘nixos-help’).
|
||||||
|
|
||||||
{ config, pkgs, lib, ... }:
|
{ config, pkgs, lib, ... }:
|
||||||
|
|
||||||
|
let sparkConfDir = pkgs.stdenv.mkDerivation {
|
||||||
|
name = "spark-config";
|
||||||
|
dontUnpack = true;
|
||||||
|
installPhase = ''
|
||||||
|
# source standard environment
|
||||||
|
. $stdenv/setup
|
||||||
|
|
||||||
|
# shorthands
|
||||||
|
base_conf=${pkgs.spark}/lib/${pkgs.spark.untarDir}/conf/
|
||||||
|
|
||||||
|
# create output dirs for new derivation
|
||||||
|
mkdir -p $out/
|
||||||
|
|
||||||
|
# link unchanged files from the original gnome-session
|
||||||
|
for f in $base_conf/*.template ; do
|
||||||
|
ln -sf $f $out/
|
||||||
|
done
|
||||||
|
|
||||||
|
# change selected files
|
||||||
|
cp $out/log4j.properties{.template,}
|
||||||
|
|
||||||
|
cat > $out/spark-env.sh <<- STOP
|
||||||
|
export JAVA_HOME="${pkgs.jdk8}"
|
||||||
|
export SPARK_HOME="${pkgs.spark}/lib/${pkgs.spark.untarDir}"
|
||||||
|
export SPARK_DIST_CLASSPATH=$(${pkgs.hadoop}/bin/hadoop classpath)
|
||||||
|
export PYSPARK_PYTHON="${pkgs.python3Packages.python}/bin/${pkgs.python3Packages.python.executable}"
|
||||||
|
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
|
||||||
|
export SPARKR_R_SHELL="${pkgs.R}/bin/R"
|
||||||
|
export PATH="\$PATH:${pkgs.R}/bin"
|
||||||
|
STOP
|
||||||
|
|
||||||
|
cat > $out/spark-defaults.conf <<- STOP
|
||||||
|
spark.eventLog.enabled true
|
||||||
|
spark.eventLog.dir hdfs://localhost:/logs/spark
|
||||||
|
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
|
||||||
|
STOP
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
in
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
<nixos-hardware/common/cpu/amd>
|
<nixos-hardware/common/cpu/amd>
|
||||||
|
|
@ -102,7 +142,7 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
clamav = { daemon.enable = true; updater.enable = true; };
|
clamav = { daemon.enable = true; updater.enable = true; };
|
||||||
dbus.packages = with pkgs; [ gnome.dconf ];
|
dbus.packages = with pkgs; [ pkgs.dconf ];
|
||||||
gnome.gnome-keyring.enable = true;
|
gnome.gnome-keyring.enable = true;
|
||||||
gvfs = { enable = true; package = pkgs.gnome3.gvfs; };
|
gvfs = { enable = true; package = pkgs.gnome3.gvfs; };
|
||||||
fwupd.enable = true;
|
fwupd.enable = true;
|
||||||
|
|
@ -154,11 +194,104 @@
|
||||||
};
|
};
|
||||||
# gnome.gnome-remote-desktop.enable = true;
|
# gnome.gnome-remote-desktop.enable = true;
|
||||||
zerotierone = { enable = true; joinNetworks = [ "8056c2e21cf9c753" ]; };
|
zerotierone = { enable = true; joinNetworks = [ "8056c2e21cf9c753" ]; };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
spark = {
|
||||||
|
master = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
worker = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
confDir = sparkConfDir;
|
||||||
|
};
|
||||||
|
|
||||||
|
hadoop = {
|
||||||
|
coreSite = {
|
||||||
|
"fs.defaultFS" = "hdfs://localhost:8020";
|
||||||
|
};
|
||||||
|
hdfsSite = {
|
||||||
|
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
|
||||||
|
"dfs.permissions" = "false";
|
||||||
|
};
|
||||||
|
|
||||||
|
hdfs = {
|
||||||
|
namenode = {
|
||||||
|
enable = true;
|
||||||
|
formatOnInit = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
datanode = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
journalnode = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
zkfc = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
httpfs = {
|
||||||
|
enable = true;
|
||||||
|
restartIfChanged = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
yarn = {
|
||||||
|
resourcemanager.enable = true;
|
||||||
|
nodemanager.enable = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
ethminer = {
|
||||||
|
enable = false;
|
||||||
|
wallet = "0x73b788882e1C182123333f42FFf275B7dd7f51bb";
|
||||||
|
toolkit = "opencl";
|
||||||
|
rig = "thor";
|
||||||
|
pool = "eth-eu1.nanopool.org";
|
||||||
|
stratumPort = 9999;
|
||||||
|
|
||||||
|
registerMail = "";
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
# teamviewer.enable = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.spark-history = {
|
||||||
|
path = with pkgs; [ procps openssh nettools ];
|
||||||
|
description = "spark history service.";
|
||||||
|
after = [ "network.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
restartIfChanged = true;
|
||||||
|
environment = {
|
||||||
|
SPARK_CONF_DIR = sparkConfDir;
|
||||||
|
SPARK_LOG_DIR = "/var/log/spark";
|
||||||
|
};
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "forking";
|
||||||
|
User = "spark";
|
||||||
|
Group = "spark";
|
||||||
|
WorkingDirectory = "${pkgs.spark}/lib/${pkgs.spark.untarDir}";
|
||||||
|
ExecStart = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/start-history-server.sh";
|
||||||
|
ExecStop = "${pkgs.spark}/lib/${pkgs.spark.untarDir}/sbin/stop-history-server.sh";
|
||||||
|
TimeoutSec = 300;
|
||||||
|
StartLimitBurst = 10;
|
||||||
|
Restart = "always";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
services.teamviewer.enable = true;
|
services.teamviewer.enable = true;
|
||||||
|
|
||||||
security = {
|
security = {
|
||||||
|
pam.services."kde" = {
|
||||||
|
enableKwallet = true;
|
||||||
|
};
|
||||||
rtkit.enable = true;
|
rtkit.enable = true;
|
||||||
sudo.extraConfig = ''
|
sudo.extraConfig = ''
|
||||||
Defaults pwfeedback
|
Defaults pwfeedback
|
||||||
|
|
@ -187,7 +320,8 @@
|
||||||
allowUnfree = true;
|
allowUnfree = true;
|
||||||
packageOverrides = pkgs: {
|
packageOverrides = pkgs: {
|
||||||
steam = pkgs.steam.override {
|
steam = pkgs.steam.override {
|
||||||
extraPkgs = pkgs: with pkgs; [ icu ];
|
extraPkgs = pkgs: with pkgs; [ ];
|
||||||
|
extraLibraries = pkgs: with pkgs; [ fontconfig.lib icu freetype ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
# cudaSupport = true;
|
# cudaSupport = true;
|
||||||
|
|
|
||||||
31
spark_conf/fairscheduler.xml.template
Normal file
31
spark_conf/fairscheduler.xml.template
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<allocations>
|
||||||
|
<pool name="production">
|
||||||
|
<schedulingMode>FAIR</schedulingMode>
|
||||||
|
<weight>1</weight>
|
||||||
|
<minShare>2</minShare>
|
||||||
|
</pool>
|
||||||
|
<pool name="test">
|
||||||
|
<schedulingMode>FIFO</schedulingMode>
|
||||||
|
<weight>2</weight>
|
||||||
|
<minShare>3</minShare>
|
||||||
|
</pool>
|
||||||
|
</allocations>
|
||||||
46
spark_conf/log4j.properties
Normal file
46
spark_conf/log4j.properties
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Set everything to be logged to the console
|
||||||
|
log4j.rootCategory=INFO, console
|
||||||
|
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.console.target=System.err
|
||||||
|
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
|
||||||
|
|
||||||
|
# Set the default spark-shell log level to WARN. When running the spark-shell, the
|
||||||
|
# log level for this class is used to overwrite the root logger's log level, so that
|
||||||
|
# the user can have different defaults for the shell and regular Spark apps.
|
||||||
|
log4j.logger.org.apache.spark.repl.Main=WARN
|
||||||
|
|
||||||
|
# Settings to quiet third party logs that are too verbose
|
||||||
|
log4j.logger.org.sparkproject.jetty=WARN
|
||||||
|
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
|
||||||
|
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
|
||||||
|
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
|
||||||
|
log4j.logger.org.apache.parquet=ERROR
|
||||||
|
log4j.logger.parquet=ERROR
|
||||||
|
|
||||||
|
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
|
||||||
|
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
|
||||||
|
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
|
||||||
|
|
||||||
|
# For deploying Spark ThriftServer
|
||||||
|
# SPARK-34128:Suppress undesirable TTransportException warnings involved in THRIFT-4805
|
||||||
|
log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
|
||||||
|
log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
|
||||||
|
log4j.appender.console.filter.1.AcceptOnMatch=false
|
||||||
46
spark_conf/log4j.properties.template
Normal file
46
spark_conf/log4j.properties.template
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Set everything to be logged to the console
|
||||||
|
log4j.rootCategory=INFO, console
|
||||||
|
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.console.target=System.err
|
||||||
|
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
|
||||||
|
|
||||||
|
# Set the default spark-shell log level to WARN. When running the spark-shell, the
|
||||||
|
# log level for this class is used to overwrite the root logger's log level, so that
|
||||||
|
# the user can have different defaults for the shell and regular Spark apps.
|
||||||
|
log4j.logger.org.apache.spark.repl.Main=WARN
|
||||||
|
|
||||||
|
# Settings to quiet third party logs that are too verbose
|
||||||
|
log4j.logger.org.sparkproject.jetty=WARN
|
||||||
|
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
|
||||||
|
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
|
||||||
|
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
|
||||||
|
log4j.logger.org.apache.parquet=ERROR
|
||||||
|
log4j.logger.parquet=ERROR
|
||||||
|
|
||||||
|
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
|
||||||
|
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
|
||||||
|
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
|
||||||
|
|
||||||
|
# For deploying Spark ThriftServer
|
||||||
|
# SPARK-34128:Suppress undesirable TTransportException warnings involved in THRIFT-4805
|
||||||
|
log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
|
||||||
|
log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
|
||||||
|
log4j.appender.console.filter.1.AcceptOnMatch=false
|
||||||
210
spark_conf/metrics.properties.template
Normal file
210
spark_conf/metrics.properties.template
Normal file
|
|
@ -0,0 +1,210 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# syntax: [instance].sink|source.[name].[options]=[value]
|
||||||
|
|
||||||
|
# This file configures Spark's internal metrics system. The metrics system is
|
||||||
|
# divided into instances which correspond to internal components.
|
||||||
|
# Each instance can be configured to report its metrics to one or more sinks.
|
||||||
|
# Accepted values for [instance] are "master", "worker", "executor", "driver",
|
||||||
|
# and "applications". A wildcard "*" can be used as an instance name, in
|
||||||
|
# which case all instances will inherit the supplied property.
|
||||||
|
#
|
||||||
|
# Within an instance, a "source" specifies a particular set of grouped metrics.
|
||||||
|
# there are two kinds of sources:
|
||||||
|
# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
|
||||||
|
# collect a Spark component's internal state. Each instance is paired with a
|
||||||
|
# Spark source that is added automatically.
|
||||||
|
# 2. Common sources, like JvmSource, which will collect low level state.
|
||||||
|
# These can be added through configuration options and are then loaded
|
||||||
|
# using reflection.
|
||||||
|
#
|
||||||
|
# A "sink" specifies where metrics are delivered to. Each instance can be
|
||||||
|
# assigned one or more sinks.
|
||||||
|
#
|
||||||
|
# The sink|source field specifies whether the property relates to a sink or
|
||||||
|
# source.
|
||||||
|
#
|
||||||
|
# The [name] field specifies the name of source or sink.
|
||||||
|
#
|
||||||
|
# The [options] field is the specific property of this source or sink. The
|
||||||
|
# source or sink is responsible for parsing this property.
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
# 1. To add a new sink, set the "class" option to a fully qualified class
|
||||||
|
# name (see examples below).
|
||||||
|
# 2. Some sinks involve a polling period. The minimum allowed polling period
|
||||||
|
# is 1 second.
|
||||||
|
# 3. Wildcard properties can be overridden by more specific properties.
|
||||||
|
# For example, master.sink.console.period takes precedence over
|
||||||
|
# *.sink.console.period.
|
||||||
|
# 4. A metrics specific configuration
|
||||||
|
# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
|
||||||
|
# added to Java properties using -Dspark.metrics.conf=xxx if you want to
|
||||||
|
# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
|
||||||
|
# and it will be loaded automatically.
|
||||||
|
# 5. The MetricsServlet sink is added by default as a sink in the master,
|
||||||
|
# worker and driver, and you can send HTTP requests to the "/metrics/json"
|
||||||
|
# endpoint to get a snapshot of all the registered metrics in JSON format.
|
||||||
|
# For master, requests to the "/metrics/master/json" and
|
||||||
|
# "/metrics/applications/json" endpoints can be sent separately to get
|
||||||
|
# metrics snapshots of the master instance and applications. This
|
||||||
|
# MetricsServlet does not have to be configured.
|
||||||
|
# 6. The metrics system can also be configured using Spark configuration
|
||||||
|
# parameters. The relevant parameter names are formed by adding the
|
||||||
|
# prefix "spark.metrics.conf." to the configuration entries detailed in
|
||||||
|
# this file (see examples below).
|
||||||
|
|
||||||
|
## List of available common sources and their properties.
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.source.JvmSource
|
||||||
|
# Note: Currently, JvmSource is the only available common source.
|
||||||
|
# It can be added to an instance by setting the "class" option to its
|
||||||
|
# fully qualified class name (see examples below).
|
||||||
|
|
||||||
|
## List of available sinks and their properties.
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.ConsoleSink
|
||||||
|
# Name: Default: Description:
|
||||||
|
# period 10 Poll period
|
||||||
|
# unit seconds Unit of the poll period
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.CSVSink
|
||||||
|
# Name: Default: Description:
|
||||||
|
# period 10 Poll period
|
||||||
|
# unit seconds Unit of the poll period
|
||||||
|
# directory /tmp Where to store CSV files
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.GangliaSink
|
||||||
|
# Name: Default: Description:
|
||||||
|
# host NONE Hostname or multicast group of the Ganglia server,
|
||||||
|
# must be set
|
||||||
|
# port NONE Port of the Ganglia server(s), must be set
|
||||||
|
# period 10 Poll period
|
||||||
|
# unit seconds Unit of the poll period
|
||||||
|
# ttl 1 TTL of messages sent by Ganglia
|
||||||
|
# dmax 0 Lifetime in seconds of metrics (0 never expired)
|
||||||
|
# mode multicast Ganglia network mode ('unicast' or 'multicast')
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.JmxSink
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.MetricsServlet
|
||||||
|
# Name: Default: Description:
|
||||||
|
# path VARIES* Path prefix from the web server root
|
||||||
|
# sample false Whether to show entire set of samples for histograms
|
||||||
|
# ('false' or 'true')
|
||||||
|
#
|
||||||
|
# * Default path is /metrics/json for all instances except the master. The
|
||||||
|
# master has two paths:
|
||||||
|
# /metrics/applications/json # App information
|
||||||
|
# /metrics/master/json # Master information
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.PrometheusServlet
|
||||||
|
# Name: Default: Description:
|
||||||
|
# path VARIES* Path prefix from the web server root
|
||||||
|
#
|
||||||
|
# * Default path is /metrics/prometheus for all instances except the master. The
|
||||||
|
# master has two paths:
|
||||||
|
# /metrics/applications/prometheus # App information
|
||||||
|
# /metrics/master/prometheus # Master information
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.GraphiteSink
|
||||||
|
# Name: Default: Description:
|
||||||
|
# host NONE Hostname of the Graphite server, must be set
|
||||||
|
# port NONE Port of the Graphite server, must be set
|
||||||
|
# period 10 Poll period
|
||||||
|
# unit seconds Unit of the poll period
|
||||||
|
# prefix EMPTY STRING Prefix to prepend to every metric's name
|
||||||
|
# protocol tcp Protocol ("tcp" or "udp") to use
|
||||||
|
# regex NONE Optional filter to send only metrics matching this regex string
|
||||||
|
|
||||||
|
# org.apache.spark.metrics.sink.StatsdSink
|
||||||
|
# Name: Default: Description:
|
||||||
|
# host 127.0.0.1 Hostname or IP of StatsD server
|
||||||
|
# port 8125 Port of StatsD server
|
||||||
|
# period 10 Poll period
|
||||||
|
# unit seconds Units of poll period
|
||||||
|
# prefix EMPTY STRING Prefix to prepend to metric name
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
# Enable JmxSink for all instances by class name
|
||||||
|
#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
|
||||||
|
|
||||||
|
# Enable ConsoleSink for all instances by class name
|
||||||
|
#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
|
||||||
|
|
||||||
|
# Enable StatsdSink for all instances by class name
|
||||||
|
#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink
|
||||||
|
#*.sink.statsd.prefix=spark
|
||||||
|
|
||||||
|
# Polling period for the ConsoleSink
|
||||||
|
#*.sink.console.period=10
|
||||||
|
# Unit of the polling period for the ConsoleSink
|
||||||
|
#*.sink.console.unit=seconds
|
||||||
|
|
||||||
|
# Polling period for the ConsoleSink specific for the master instance
|
||||||
|
#master.sink.console.period=15
|
||||||
|
# Unit of the polling period for the ConsoleSink specific for the master
|
||||||
|
# instance
|
||||||
|
#master.sink.console.unit=seconds
|
||||||
|
|
||||||
|
# Enable CsvSink for all instances by class name
|
||||||
|
#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
|
||||||
|
|
||||||
|
# Polling period for the CsvSink
|
||||||
|
#*.sink.csv.period=1
|
||||||
|
# Unit of the polling period for the CsvSink
|
||||||
|
#*.sink.csv.unit=minutes
|
||||||
|
|
||||||
|
# Polling directory for CsvSink
|
||||||
|
#*.sink.csv.directory=/tmp/
|
||||||
|
|
||||||
|
# Polling period for the CsvSink specific for the worker instance
|
||||||
|
#worker.sink.csv.period=10
|
||||||
|
# Unit of the polling period for the CsvSink specific for the worker instance
|
||||||
|
#worker.sink.csv.unit=minutes
|
||||||
|
|
||||||
|
# Enable Slf4jSink for all instances by class name
|
||||||
|
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
|
||||||
|
|
||||||
|
# Polling period for the Slf4JSink
|
||||||
|
#*.sink.slf4j.period=1
|
||||||
|
# Unit of the polling period for the Slf4jSink
|
||||||
|
#*.sink.slf4j.unit=minutes
|
||||||
|
|
||||||
|
# Example configuration for Graphite sink
|
||||||
|
#*.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink
|
||||||
|
#*.sink.graphite.host=<graphiteEndPoint_hostName>
|
||||||
|
#*.sink.graphite.port=<listening_port>
|
||||||
|
#*.sink.graphite.period=10
|
||||||
|
#*.sink.graphite.unit=seconds
|
||||||
|
#*.sink.graphite.prefix=<optional_value>
|
||||||
|
|
||||||
|
# Enable JvmSource for instance master, worker, driver and executor
|
||||||
|
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||||
|
|
||||||
|
#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||||
|
|
||||||
|
#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||||
|
|
||||||
|
#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
|
||||||
|
|
||||||
|
# Example configuration for PrometheusServlet
|
||||||
|
#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet
|
||||||
|
#*.sink.prometheusServlet.path=/metrics/prometheus
|
||||||
|
#master.sink.prometheusServlet.path=/metrics/master/prometheus
|
||||||
|
#applications.sink.prometheusServlet.path=/metrics/applications/prometheus
|
||||||
39
spark_conf/spark-defaults.conf
Normal file
39
spark_conf/spark-defaults.conf
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Default system properties included when running spark-submit.
|
||||||
|
# This is useful for setting default environmental settings.
|
||||||
|
|
||||||
|
# Example:
|
||||||
|
# spark.master spark://master:7077
|
||||||
|
# spark.eventLog.enabled true
|
||||||
|
# spark.eventLog.dir hdfs://namenode:8021/directory
|
||||||
|
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||||
|
# spark.driver.memory 5g
|
||||||
|
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
|
||||||
|
|
||||||
|
# spark.io.compression.codec lzf
|
||||||
|
# spark.io.compression.codec org.apache.spark.io.SnappyCompressionCodec
|
||||||
|
# spark.eventLog.compress false
|
||||||
|
|
||||||
|
spark.eventLog.enabled true
|
||||||
|
spark.eventLog.dir hdfs://localhost:/logs/spark
|
||||||
|
spark.history.fs.logDirectory hdfs://localhost:/logs/spark
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
27
spark_conf/spark-defaults.conf.template
Normal file
27
spark_conf/spark-defaults.conf.template
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Default system properties included when running spark-submit.
|
||||||
|
# This is useful for setting default environmental settings.
|
||||||
|
|
||||||
|
# Example:
|
||||||
|
# spark.master spark://master:7077
|
||||||
|
# spark.eventLog.enabled true
|
||||||
|
# spark.eventLog.dir hdfs://namenode:8021/directory
|
||||||
|
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||||
|
# spark.driver.memory 5g
|
||||||
|
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
|
||||||
7
spark_conf/spark-env.sh
Normal file
7
spark_conf/spark-env.sh
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
export JAVA_HOME="/nix/store/d0akdmr675jrlabv7n8syg8yrg1zlyxz-openjdk-8u272-b10"
|
||||||
|
export SPARK_HOME="/nix/store/zhj5q1pi0bs2lpc0lbkw8qkg03ywx9b8-spark-3.1.2/lib/spark-3.1.2"
|
||||||
|
export SPARK_DIST_CLASSPATH=/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/common/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/hdfs/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/mapreduce/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/lib/*:/nix/store/2b608nzvrsw1b8jd14vc77dc5j32q498-hadoop-3.3.1/lib/hadoop-3.3.1/share/hadoop/yarn/*
|
||||||
|
export PYSPARK_PYTHON="/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/bin/python3.9"
|
||||||
|
export PYTHONPATH="$PYTHONPATH:/nix/store/2c9w4p2x6x0l64fdvcmc11app7x4xran-python3-3.9.6/lib/python3.9/site-packages"
|
||||||
|
export SPARKR_R_SHELL="/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin/R"
|
||||||
|
export PATH="$PATH:/nix/store/h1s3y5jjrwdm5gd2qyxp2ldsnykippcb-R-4.1.2/bin"
|
||||||
73
spark_conf/spark-env.sh.template
Executable file
73
spark_conf/spark-env.sh.template
Executable file
|
|
@ -0,0 +1,73 @@
|
||||||
|
#!/nix/store/vfai0jim0db67nk9rd7ziq29jxb5n79n-bash-5.1-p8/bin/bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# This file is sourced when running various Spark programs.
|
||||||
|
# Copy it as spark-env.sh and edit that to configure Spark for your site.
|
||||||
|
|
||||||
|
# Options read when launching programs locally with
|
||||||
|
# ./bin/run-example or ./bin/spark-submit
|
||||||
|
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||||
|
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||||
|
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
|
||||||
|
|
||||||
|
# Options read by executors and drivers running inside the cluster
|
||||||
|
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||||
|
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
|
||||||
|
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
|
||||||
|
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
|
||||||
|
|
||||||
|
# Options read in YARN client/cluster mode
|
||||||
|
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
|
||||||
|
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||||
|
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
|
||||||
|
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
|
||||||
|
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
|
||||||
|
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
|
||||||
|
|
||||||
|
# Options for the daemons used in the standalone deploy mode
|
||||||
|
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
|
||||||
|
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
|
||||||
|
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
|
||||||
|
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
|
||||||
|
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
|
||||||
|
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
|
||||||
|
# - SPARK_WORKER_DIR, to set the working directory of worker processes
|
||||||
|
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
|
||||||
|
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
|
||||||
|
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
|
||||||
|
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
|
||||||
|
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
|
||||||
|
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
|
||||||
|
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
|
||||||
|
|
||||||
|
# Options for launcher
|
||||||
|
# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
|
||||||
|
|
||||||
|
# Generic options for the daemons used in the standalone deploy mode
|
||||||
|
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
|
||||||
|
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
|
||||||
|
# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5.
|
||||||
|
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
|
||||||
|
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
|
||||||
|
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
|
||||||
|
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
|
||||||
|
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
|
||||||
|
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
|
||||||
|
# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
|
||||||
|
# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
|
||||||
19
spark_conf/workers.template
Normal file
19
spark_conf/workers.template
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# A Spark Worker will be started on each of the machines listed below.
|
||||||
|
localhost
|
||||||
Loading…
Add table
Add a link
Reference in a new issue