diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7761d46..42ee077 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -50,6 +50,9 @@ jobs:
- name: zipkin-dependencies-elasticsearch-v8
module: zipkin-dependencies-elasticsearch
groups: docker,elasticsearch8
+ - name: zipkin-dependencies-opensearch-v2
+ module: zipkin-dependencies-opensearch
+ groups: docker,opensearch2
- name: zipkin-dependencies-mysql
module: zipkin-dependencies-mysql
groups: docker
diff --git a/README.md b/README.md
index e296e55..e7b3b33 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ are supported, including Cassandra, MySQL and Elasticsearch.
* `STORAGE_TYPE=cassandra3` : requires Cassandra 3.11.3+; tested against the latest patch of 4.0
* `STORAGE_TYPE=mysql` : requires MySQL 5.6+; tested against MySQL 10.11
-* `STORAGE_TYPE=elasticsearch` : requires Elasticsearch 7+; tested against last minor release of 7.x and 8.x
+* `STORAGE_TYPE=elasticsearch` : requires Elasticsearch 7+ or OpenSearch 2.x; tested against last minor release of Elasticsearch 7.x and 8.x, OpenSearch 2.x
## Quick-start
@@ -92,20 +92,20 @@ $ STORAGE_TYPE=mysql MYSQL_USER=root java -jar zipkin-dependencies.jar
```
### Elasticsearch Storage
-Elasticsearch is used when `STORAGE_TYPE=elasticsearch`. The schema is compatible with Zipkin's [Elasticsearch storage component](https://github.com/openzipkin/zipkin/tree/master/zipkin-storage/elasticsearch).
+Elasticsearch/OpenSearch is used when `STORAGE_TYPE=elasticsearch`. The schema is compatible with Zipkin's [Elasticsearch storage component](https://github.com/openzipkin/zipkin/tree/master/zipkin-storage/elasticsearch).
* `ES_INDEX`: The index prefix to use when generating daily index names. Defaults to zipkin.
* `ES_DATE_SEPARATOR`: The separator used when generating dates in index.
Defaults to '-' so the queried index look like zipkin-yyyy-DD-mm
Could for example be changed to '.' to give zipkin-yyyy.MM.dd
- * `ES_HOSTS`: A comma separated list of elasticsearch hosts advertising http. Defaults to
+ * `ES_HOSTS`: A comma separated list of Elasticsearch / OpenSearch hosts advertising http. Defaults to
localhost. Add port section if not listening on port 9200. Only one of these hosts
needs to be available to fetch the remaining nodes in the cluster. It is
recommended to set this to all the master nodes of the cluster. Use url format for
SSL. For example, "https://yourhost:8888"
* `ES_NODES_WAN_ONLY`: Set to true to only use the values set in ES_HOSTS, for example if your
- elasticsearch cluster is in Docker. Defaults to false
- * `ES_USERNAME` and `ES_PASSWORD`: Elasticsearch basic authentication. Use when X-Pack security
+ Elasticsearch / OpenSearch cluster is in Docker. Defaults to false
+ * `ES_USERNAME` and `ES_PASSWORD`: Elasticsearch / OpenSearch basic authentication. Use when X-Pack security
(formerly Shield) is in place. By default no username or
password is provided to elasticsearch.
diff --git a/docker/examples/README.md b/docker/examples/README.md
index 80e5d17..a63ac9d 100644
--- a/docker/examples/README.md
+++ b/docker/examples/README.md
@@ -15,6 +15,13 @@ $ STORAGE_TYPE=elasticsearch
$ docker-compose -f docker-compose.yml -f docker-compose-${STORAGE_TYPE}.yml up
```
+The `elasticsearch` storage type is also compatible with OpenSearch,
+you can start the example setup like that:
+
+```
+$ docker-compose -f docker-compose.yml -f docker-compose-opensearch.yml up
+```
+
This starts zipkin, the corresponding storage and makes an example request.
After that, it runs the dependencies job on-demand.
diff --git a/docker/examples/docker-compose-opensearch.yml b/docker/examples/docker-compose-opensearch.yml
new file mode 100644
index 0000000..67563f1
--- /dev/null
+++ b/docker/examples/docker-compose-opensearch.yml
@@ -0,0 +1,40 @@
+#
+# Copyright The OpenZipkin Authors
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# This file uses the version 2 docker-compose file format, described here:
+# https://docs.docker.com/compose/compose-file/#version-2
+#
+# It extends the default configuration from docker-compose.yml to run the
+# zipkin-opensearch2 container instead of the zipkin-mysql container.
+
+version: '2.4'
+
+services:
+ storage:
+ image: ghcr.io/openzipkin/zipkin-opensearch2:${TAG:-latest}
+ container_name: opensearch
+ # Uncomment to expose the storage port for testing
+ # ports:
+ # - 9200:9200
+
+ # Use OpenSearch instead of in-memory storage
+ zipkin:
+ extends:
+ file: docker-compose.yml
+ service: zipkin
+ environment:
+ - STORAGE_TYPE=elasticsearch
+ # Point the zipkin at the storage backend
+ - ES_HOSTS=opensearch:9200
+ # Uncomment to see requests to and from elasticsearch
+ # - ES_HTTP_LOGGING=BODY
+
+ dependencies:
+ extends:
+ file: docker-compose.yml
+ service: dependencies
+ environment:
+ - STORAGE_TYPE=elasticsearch
+ - ES_HOSTS=opensearch
diff --git a/main/pom.xml b/main/pom.xml
index c61b8a6..7605326 100644
--- a/main/pom.xml
+++ b/main/pom.xml
@@ -39,6 +39,19 @@
zipkin-dependencies-elasticsearch
${project.version}
+
+
+ ${project.groupId}
+ zipkin-dependencies-opensearch
+ ${project.version}
+
+
+
+ com.linecorp.armeria
+ armeria-junit5
+ ${armeria.version}
+ test
+
diff --git a/main/src/main/java/zipkin2/dependencies/ZipkinDependenciesJob.java b/main/src/main/java/zipkin2/dependencies/ZipkinDependenciesJob.java
index 7265d1c..b1775eb 100644
--- a/main/src/main/java/zipkin2/dependencies/ZipkinDependenciesJob.java
+++ b/main/src/main/java/zipkin2/dependencies/ZipkinDependenciesJob.java
@@ -13,6 +13,7 @@
import java.util.LinkedHashMap;
import java.util.TimeZone;
import zipkin2.dependencies.elasticsearch.ElasticsearchDependenciesJob;
+import zipkin2.dependencies.opensearch.OpensearchDependenciesJob;
import zipkin2.dependencies.mysql.MySQLDependenciesJob;
public final class ZipkinDependenciesJob {
@@ -61,13 +62,23 @@ public static void main(String[] args) throws UnsupportedEncodingException {
.run();
break;
case "elasticsearch":
- ElasticsearchDependenciesJob.builder()
- .logInitializer(logInitializer)
- .jars(jarPath)
- .day(day)
- .conf(sparkConf)
- .build()
- .run();
+ if (ZipkinElasticsearchStorage.flavor().equalsIgnoreCase("elasticsearch")) {
+ ElasticsearchDependenciesJob.builder()
+ .logInitializer(logInitializer)
+ .jars(jarPath)
+ .day(day)
+ .conf(sparkConf)
+ .build()
+ .run();
+ } else { // "opensearch"
+ OpensearchDependenciesJob.builder()
+ .logInitializer(logInitializer)
+ .jars(jarPath)
+ .day(day)
+ .conf(sparkConf)
+ .build()
+ .run();
+ }
break;
default:
throw new UnsupportedOperationException("Unsupported STORAGE_TYPE: " + storageType + "\n"
diff --git a/main/src/main/java/zipkin2/dependencies/ZipkinElasticsearchStorage.java b/main/src/main/java/zipkin2/dependencies/ZipkinElasticsearchStorage.java
new file mode 100644
index 0000000..b257663
--- /dev/null
+++ b/main/src/main/java/zipkin2/dependencies/ZipkinElasticsearchStorage.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright The OpenZipkin Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package zipkin2.dependencies;
+
+import java.io.IOException;
+import java.net.Authenticator;
+import java.net.PasswordAuthentication;
+import java.net.Socket;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.net.http.HttpResponse.BodyHandlers;
+import java.security.KeyManagementException;
+import java.security.NoSuchAlgorithmException;
+import java.security.SecureRandom;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.SSLEngine;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509ExtendedTrustManager;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class ZipkinElasticsearchStorage {
+ private static final Logger LOG = LoggerFactory.getLogger(ZipkinElasticsearchStorage.class);
+ private static final Pattern DISTRIBUTION = Pattern.compile("\"distribution\"\s*[:]\s*\"([^\"]+)\"");
+
+ static final String HOSTS = getEnv("ES_HOSTS", "127.0.0.1");
+ static final String USERNAME = getEnv("ES_USERNAME", null);
+ static final String PASSWORD = getEnv("ES_PASSWORD", null);
+
+ static TrustManager[] TRUST_ALL = new TrustManager [] {
+ new X509ExtendedTrustManager() {
+ @Override
+ public X509Certificate[] getAcceptedIssuers() {
+ return null;
+ }
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] certs, String authType) {
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] certs, String authType) {
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] chain, String authType, SSLEngine engine) throws CertificateException {
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] chain, String authType, Socket socket) throws CertificateException {
+ }
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] chain, String authType, SSLEngine engine) throws CertificateException {
+ }
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] chain, String authType, Socket socket) throws CertificateException {
+ }
+ }
+ };
+
+ static String flavor() {
+ return flavor(HOSTS, USERNAME, PASSWORD);
+ }
+
+ static String flavor(String hosts, String username, String password) {
+ final HttpClient.Builder builder = HttpClient
+ .newBuilder()
+ .connectTimeout(Duration.ofSeconds(5));
+
+ if (username != null && password != null) {
+ builder.authenticator(new Authenticator() {
+ @Override
+ protected PasswordAuthentication getPasswordAuthentication() {
+ return new PasswordAuthentication(username, password.toCharArray());
+ }
+ });
+ }
+
+ try {
+ final SSLContext sslContext = SSLContext.getInstance("TLS");
+ sslContext.init(null, TRUST_ALL, new SecureRandom());
+
+ final HttpClient client = builder.sslContext(sslContext).build();
+ try {
+ for (String host: parseHosts(hosts)) {
+ final HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(host)).build();
+ try {
+ final HttpResponse response = client.send(request, BodyHandlers.ofString());
+ final Matcher matcher = DISTRIBUTION.matcher(response.body());
+ if (matcher.find()) {
+ return matcher.group(1).toLowerCase();
+ }
+ } catch (InterruptedException | IOException ex) {
+ LOG.warn("Unable issue HTTP GET request to '" + host + "'", ex);
+ }
+ }
+ } finally {
+ if (client instanceof AutoCloseable) {
+ try {
+ // Since JDK-21, the HttpClient is AutoCloseable
+ ((AutoCloseable) client).close();
+ } catch (Exception ex) {
+ /* Ignore */
+ }
+ }
+ }
+ } catch (final NoSuchAlgorithmException | KeyManagementException ex) {
+ LOG.warn("Unable to configure HttpClient", ex);
+ }
+
+ return "elasticsearch";
+ }
+
+ private static String getEnv(String key, String defaultValue) {
+ String result = System.getenv(key);
+ return result != null && !result.isEmpty() ? result : defaultValue;
+ }
+
+ static String[] parseHosts(String hosts) {
+ final String[] hostParts = hosts.split(",", -1);
+
+ // Detect default scheme to use if not specified
+ String defaultScheme = "http";
+ for (int i = 0; i < hostParts.length; i++) {
+ String host = hostParts[i];
+ if (host.startsWith("https")) {
+ defaultScheme = "https";
+ break;
+ }
+ }
+
+ Collection list = new ArrayList<>();
+ for (int i = 0; i < hostParts.length; i++) {
+ String host = hostParts[i];
+ URI httpUri = host.startsWith("http") ? URI.create(host) : URI.create(defaultScheme + "://" + host);
+
+ int port = httpUri.getPort();
+ if (port == -1) {
+ port = 9200; /* default Elasticsearch / OpenSearch port */
+ }
+
+ list.add(httpUri.getScheme() + "://" + httpUri.getHost() + ":" + port);
+ }
+
+ return list.toArray(new String[0]);
+ }
+}
diff --git a/main/src/test/java/zipkin2/dependencies/ZipkinElasticsearchStorageTest.java b/main/src/test/java/zipkin2/dependencies/ZipkinElasticsearchStorageTest.java
new file mode 100644
index 0000000..19a75cf
--- /dev/null
+++ b/main/src/test/java/zipkin2/dependencies/ZipkinElasticsearchStorageTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright The OpenZipkin Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package zipkin2.dependencies;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.RegisterExtension;
+
+import com.linecorp.armeria.common.AggregatedHttpResponse;
+import com.linecorp.armeria.common.HttpStatus;
+import com.linecorp.armeria.common.MediaType;
+import com.linecorp.armeria.testing.junit5.server.mock.MockWebServerExtension;
+
+class ZipkinElasticsearchStorageTest {
+ static final AggregatedHttpResponse ELASTICSEARCH_RESPONSE = AggregatedHttpResponse.of(
+ HttpStatus.OK, MediaType.JSON_UTF_8, """
+ {
+ "name" : "zipkin-elasticsearch",
+ "cluster_name" : "docker-cluster",
+ "cluster_uuid" : "wByRPgSgTryYl0TZXW4MsA",
+ "version" : {
+ "number" : "7.0.1",
+ "build_flavor" : "default",
+ "build_type" : "tar",
+ "build_hash" : "e4efcb5",
+ "build_date" : "2019-04-29T12:56:03.145736Z",
+ "build_snapshot" : false,
+ "lucene_version" : "8.0.0",
+ "minimum_wire_compatibility_version" : "6.7.0",
+ "minimum_index_compatibility_version" : "6.0.0-beta1"
+ },
+ "tagline" : "You Know, for Search"
+ }
+ """);
+ static final AggregatedHttpResponse OPENSEARCH_RESPONSE = AggregatedHttpResponse.of(
+ HttpStatus.OK, MediaType.JSON_UTF_8, """
+ {
+ "name" : "PV-NhJd",
+ "cluster_name" : "CollectorDBCluster",
+ "cluster_uuid" : "UjZaM0fQRC6tkHINCg9y8w",
+ "version" : {
+ "distribution" : "opensearch",
+ "number" : "2.11.1",
+ "build_type" : "tar",
+ "build_hash" : "6b1986e964d440be9137eba1413015c31c5a7752",
+ "build_date" : "2023-11-29T21:43:10.135035992Z",
+ "build_snapshot" : false,
+ "lucene_version" : "9.7.0",
+ "minimum_wire_compatibility_version" : "7.10.0",
+ "minimum_index_compatibility_version" : "7.0.0"
+ },
+ "tagline" : "The OpenSearch Project: https://opensearch.org/"
+ }
+ """);
+
+ @RegisterExtension static MockWebServerExtension server = new MockWebServerExtension();
+
+ @Test void opensearch_http() throws Exception {
+ server.enqueue(OPENSEARCH_RESPONSE);
+
+ assertThat(ZipkinElasticsearchStorage.flavor(server.httpUri().toString(), null, null))
+ .isEqualTo("opensearch");
+ }
+
+ @Test void opensearch_https() throws Exception {
+ server.enqueue(OPENSEARCH_RESPONSE);
+
+ assertThat(ZipkinElasticsearchStorage.flavor(server.httpsUri().toString(), null, null))
+ .isEqualTo("opensearch");
+ }
+
+ @Test void elasticsearch_http() throws Exception {
+ server.enqueue(ELASTICSEARCH_RESPONSE);
+
+ assertThat(ZipkinElasticsearchStorage.flavor(server.httpUri().toString(), null, null))
+ .isEqualTo("elasticsearch");
+ }
+
+ @Test void elasticsearch_https() throws Exception {
+ server.enqueue(ELASTICSEARCH_RESPONSE);
+
+ assertThat(ZipkinElasticsearchStorage.flavor(server.httpsUri().toString(), null, null))
+ .isEqualTo("elasticsearch");
+ }
+}
diff --git a/opensearch/pom.xml b/opensearch/pom.xml
new file mode 100644
index 0000000..01c335a
--- /dev/null
+++ b/opensearch/pom.xml
@@ -0,0 +1,71 @@
+
+
+
+ 4.0.0
+
+
+ io.zipkin.dependencies
+ zipkin-dependencies-parent
+ 3.2.0-SNAPSHOT
+
+
+ zipkin-dependencies-opensearch
+ Zipkin Dependencies: OpenSearch
+
+
+ ${project.basedir}/..
+ 4.12.0
+
+
+
+
+ org.opensearch.client
+ opensearch-spark-30_${scala.binary.version}
+ ${opensearch-spark.version}
+
+
+
+ io.zipkin.zipkin2
+ zipkin-storage-elasticsearch
+ ${zipkin.version}
+ test
+
+
+ com.squareup.okhttp3
+ mockwebserver
+ ${okhttp.version}
+ test
+
+
+ com.squareup.okhttp3
+ okhttp-tls
+ ${okhttp.version}
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+
+
+ com.linecorp.armeria
+ armeria-junit5
+ ${armeria.version}
+ test
+
+
+ org.testcontainers
+ junit-jupiter
+ ${testcontainers.version}
+ test
+
+
+
diff --git a/opensearch/src/main/java/zipkin2/dependencies/opensearch/OpensearchDependenciesJob.java b/opensearch/src/main/java/zipkin2/dependencies/opensearch/OpensearchDependenciesJob.java
new file mode 100644
index 0000000..e3854fa
--- /dev/null
+++ b/opensearch/src/main/java/zipkin2/dependencies/opensearch/OpensearchDependenciesJob.java
@@ -0,0 +1,273 @@
+/*
+ * Copyright The OpenZipkin Authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package zipkin2.dependencies.opensearch;
+
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.MalformedJsonException;
+import java.io.IOException;
+import java.io.StringReader;
+import java.net.URI;
+import java.nio.charset.Charset;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import javax.annotation.Nullable;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.PairFunction;
+import org.opensearch.spark.rdd.api.java.JavaOpenSearchSpark;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+import zipkin2.DependencyLink;
+import zipkin2.codec.SpanBytesDecoder;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_INDEX_READ_MISSING_AS_EMPTY;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_HTTP_AUTH_PASS;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_HTTP_AUTH_USER;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_KEYSTORE_LOCATION;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_KEYSTORE_PASS;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_TRUST_STORE_LOCATION;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_TRUST_STORE_PASS;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_USE_SSL;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NODES;
+import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NODES_WAN_ONLY;
+import static zipkin2.internal.DateUtil.midnightUTC;
+
+public final class OpensearchDependenciesJob {
+ static final Charset UTF_8 = Charset.forName("UTF-8");
+
+ private static final Logger log = LoggerFactory.getLogger(OpensearchDependenciesJob.class);
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ String index = getEnv("ES_INDEX", "zipkin");
+ String hosts = getEnv("ES_HOSTS", "127.0.0.1");
+ String username = getEnv("ES_USERNAME", null);
+ String password = getEnv("ES_PASSWORD", null);
+
+ final Map sparkProperties = new LinkedHashMap<>();
+
+ Builder() {
+ sparkProperties.put("spark.ui.enabled", "false");
+ // don't die if there are no spans
+ sparkProperties.put(OPENSEARCH_INDEX_READ_MISSING_AS_EMPTY, "true");
+ sparkProperties.put(OPENSEARCH_NODES_WAN_ONLY, getEnv("ES_NODES_WAN_ONLY", "false"));
+ sparkProperties.put(OPENSEARCH_NET_SSL_KEYSTORE_LOCATION,
+ getSystemPropertyAsFileResource("javax.net.ssl.keyStore"));
+ sparkProperties.put(OPENSEARCH_NET_SSL_KEYSTORE_PASS,
+ System.getProperty("javax.net.ssl.keyStorePassword", ""));
+ sparkProperties.put(OPENSEARCH_NET_SSL_TRUST_STORE_LOCATION,
+ getSystemPropertyAsFileResource("javax.net.ssl.trustStore"));
+ sparkProperties.put(OPENSEARCH_NET_SSL_TRUST_STORE_PASS,
+ System.getProperty("javax.net.ssl.trustStorePassword", ""));
+ }
+
+ // local[*] master lets us run & test the job locally without setting a Spark cluster
+ String sparkMaster = getEnv("SPARK_MASTER", "local[*]");
+ // needed when not in local mode
+ String[] jars;
+ Runnable logInitializer;
+
+ // By default, the job only works on traces whose first timestamp is today
+ long day = midnightUTC(System.currentTimeMillis());
+
+ /** When set, this indicates which jars to distribute to the cluster. */
+ public Builder jars(String... jars) {
+ this.jars = jars;
+ return this;
+ }
+
+ /** The index prefix to use when generating daily index names. Defaults to "zipkin" */
+ public Builder index(String index) {
+ this.index = checkNotNull(index, "index");
+ return this;
+ }
+
+ public Builder hosts(String hosts) {
+ this.hosts = checkNotNull(hosts, "hosts");
+ sparkProperties.put("opensearch.nodes.wan.only", "true");
+ return this;
+ }
+
+ /** username used for basic auth. Needed when Shield or X-Pack security is enabled */
+ public Builder username(String username) {
+ this.username = username;
+ return this;
+ }
+
+ /** password used for basic auth. Needed when Shield or X-Pack security is enabled */
+ public Builder password(String password) {
+ this.password = password;
+ return this;
+ }
+
+ /** Day (in epoch milliseconds) to process dependencies for. Defaults to today. */
+ public Builder day(long day) {
+ this.day = midnightUTC(day);
+ return this;
+ }
+
+ /** Extending more configuration of spark. */
+ public Builder conf(Map conf) {
+ sparkProperties.putAll(conf);
+ return this;
+ }
+
+ /** Ensures that logging is set up. Particularly important when in cluster mode. */
+ public Builder logInitializer(Runnable logInitializer) {
+ this.logInitializer = checkNotNull(logInitializer, "logInitializer");
+ return this;
+ }
+
+ public OpensearchDependenciesJob build() {
+ return new OpensearchDependenciesJob(this);
+ }
+ }
+
+ private static String getSystemPropertyAsFileResource(String key) {
+ String prop = System.getProperty(key, "");
+ return prop != null && !prop.isEmpty() ? "file:" + prop : prop;
+ }
+
+ final String index;
+ final String dateStamp;
+ final SparkConf conf;
+ @Nullable final Runnable logInitializer;
+
+ OpensearchDependenciesJob(Builder builder) {
+ this.index = builder.index;
+ String dateSeparator = getEnv("ES_DATE_SEPARATOR", "-");
+ SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd".replace("-", dateSeparator));
+ df.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.dateStamp = df.format(new Date(builder.day));
+ this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
+ if (builder.sparkMaster.startsWith("local[")) {
+ conf.set("spark.driver.bindAddress", "127.0.0.1");
+ }
+ if (builder.jars != null) conf.setJars(builder.jars);
+ if (builder.username != null) conf.set(OPENSEARCH_NET_HTTP_AUTH_USER, builder.username);
+ if (builder.password != null) conf.set(OPENSEARCH_NET_HTTP_AUTH_PASS, builder.password);
+ conf.set(OPENSEARCH_NODES, parseHosts(builder.hosts));
+ if (builder.hosts.contains("https")) conf.set(OPENSEARCH_NET_USE_SSL, "true");
+ for (Map.Entry entry : builder.sparkProperties.entrySet()) {
+ conf.set(entry.getKey(), entry.getValue());
+ log.debug("Spark conf properties: {}={}", entry.getKey(), entry.getValue());
+ }
+ this.logInitializer = builder.logInitializer;
+ }
+
+ public void run() {
+ String spanResource = index + "-span-" + dateStamp;
+ String dependencyLinkResource = index + "-dependency-" + dateStamp;
+ SpanBytesDecoder decoder = SpanBytesDecoder.JSON_V2;
+
+ log.info("Processing spans from {}", spanResource);
+ JavaRDD