visit
RUN curl -L //dlcdn.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-without-hadoop.tgz -o spark-3.5.1-bin-without-hadoop.tgz \
&& tar -xvzf spark-3.5.1-bin-without-hadoop.tgz \
&& mv spark-3.5.1-bin-without-hadoop /opt/spark \
&& rm spark-3.5.1-bin-without-hadoop.tgz
For building on Go, the parameter GOEXPERIMENT=boringcrypto
is used
For running spark-submit, the java parameter for Bouncy Castle is used Djavax.net.ssl.trustStorePassword=password
hadoop-client-runtime
hadoop-client-api
slf4j-api
entrypoint.sh
is used from the official Kubeflow repository //github.com/kubeflow/spark-operator/blob/master/entrypoint.sh
ARG SPARK_IMAGE=spark-3.5.1-bin-without-hadoop
ARG GOLANG_IMAGE=golang-1.21
ARG SPARK_OPERATOR_VERSION=1.3.1
ARG HADOOP_VERSION_DEFAULT=3.4.0
ARG HADOOP_TMP_HOME="/opt/hadoop"
ARG TARGETARCH=amd64
# Prepare spark-operator build
FROM ${GOLANG_IMAGE} as builder
WORKDIR /app/spark-operator
ARG SPARK_OPERATOR_VERSION
RUN curl -Ls //github.com/kubeflow/spark-operator/archive/refs/tags/spark-operator-chart-${SPARK_OPERATOR_VERSION}.tar.gz | tar -xz --strip-components 1 -C /app/spark-operator
RUN GOTOOLCHAIN=go1.22.3 go mod download
# Build
ARG TARGETARCH
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} GO111MODULE=on GOTOOLCHAIN=go1.22.3 GOEXPERIMENT=boringcrypto go build -a -o /app/spark-operator/spark-operator main.go
#Install Hadoop jars
ARG HADOOP_VERSION_DEFAULT
ARG HADOOP_TMP_HOME
RUN mkdir -p ${HADOOP_TMP_HOME}
RUN curl -Ls //archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION_DEFAULT}/hadoop-${HADOOP_VERSION_DEFAULT}.tar.gz | tar -xz --strip-components 1 -C ${HADOOP_TMP_HOME}
# Prepare spark-operator image
FROM ${ECR_URL}:${SPARK_IMAGE}
WORKDIR /opt/spark-operator
USER root
ENV PATH $JAVA_HOME/bin:$PATH
ENV SPARK_HOME="/opt/spark"
ENV JAVA_HOME="/opt/jdk-11.0.21"
ENV SPARK_SUBMIT_OPTS="${SPARK_SUBMIT_OPTS} -Djavax.net.ssl.trustStorePassword=password"
ENV PATH=${PATH}:${SPARK_HOME}/bin:${JAVA_HOME}/bin:
RUN yum update -y && \
yum install --setopt=tsflags=nodocs -y openssl && \
yum clean all
ARG HADOOP_TMP_HOME
COPY --from=builder ${HADOOP_TMP_HOME}/share/hadoop/client/hadoop-client-runtime-*.jar ${HADOOP_TMP_HOME}/share/hadoop/client/hadoop-client-api-*.jar ${HADOOP_TMP_HOME}/share/hadoop/common/lib/slf4j-api-*.jar /opt/spark/jars/
COPY --from=builder /app/spark-operator/spark-operator /opt/spark-operator/
COPY --from=builder /app/spark-operator/hack/gencerts.sh /usr/bin/
COPY entrypoint.sh /opt/spark-operator/
RUN chmod a+x /opt/spark-operator/entrypoint.sh
ENTRYPOINT ["/opt/spark-operator/entrypoint.sh"]