Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#247] [spark] Bump Spark Version to 2.4.7 to support GCP dataproc-release-1.5 #250

Merged
merged 15 commits into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
bf48928
[#247] [spark] Bump Spark Version to 2.4.7 | Trim Logging Module
Dee-Pac Oct 30, 2020
af18961
[#247] [spark] Bump Spark Version to 2.4.7 | Deprecate kafka 0.10
Dee-Pac Oct 30, 2020
d78326b
[#247] [spark] Bump Spark Version to 2.4.7 | Change Gimel Package Ver…
Dee-Pac Oct 30, 2020
f54b1a1
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 6, 2020
295fedb
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 6, 2020
e957499
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 6, 2020
a83d31f
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 6, 2020
c4b6716
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 7, 2020
219fc00
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 8, 2020
3b25dcb
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 8, 2020
7e5f21e
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 8, 2020
51ca21b
[#247] [spark] Bump Spark Version to 2.4.7 | Remove modules [sql, too…
Dee-Pac Nov 9, 2020
a75a05e
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 9, 2020
6e3ae2f
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 9, 2020
47a79fe
[#247] [spark] Bump Spark Version to 2.4.7 | Bump Spark Version to 2.…
Dee-Pac Nov 9, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ before_install:

# Build
install:
- build/gimel -Dmaven.test.skip=true -pl gimel-dataapi/gimel-tools -am | egrep -v "Download|Copy|Including|WARNING"
- sh -x build/gimel -Dmaven.test.skip=true -pl gimel-dataapi | egrep -iv "Download|Copy|Including|WARNING"
100 changes: 96 additions & 4 deletions build/gimel
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,81 @@
# See the License for the specific language governing permissions and
# limitations under the License.

echo "Present Working Dir"
echo $PWD

this_dir=`dirname build/install_dependencies`
# Source reusable functions
source ${this_dir}/gimel_functions

#----------------------------function will check for error code & exit if failure, else proceed further----------------------------#

#usage : check_error <$?> <custom_error_code>
#Example: Check_error < pass $? from the shell command > < Custom Message for errorcode -gt 0 >

check_error()
{
cmd_error_code=$1
custom_message=$2
if [ ${cmd_error_code} -gt 0 ]; then
write_log "Error | Stage | ${custom_message}"
exit ${cmd_error_code}
else
write_log "Success | Stage | ${custom_message}"
fi
}

#----------------------------function will check for error code & warn if failure----------------------------#

#usage : check_warning <$?> <custom_error_code>
#Example: Check_warning < pass $? from the shell command > < Custom Message for errorcode -gt 0 >


check_warning()
{

cmd_error_code=$1
pgm_exit_code=$2
pgm_exit_msg=$3
if [ ${cmd_error_code} -gt 0 ]; then
write_log "WARNING ! ${cmd_error_code} ${pgm_exit_code} ${pgm_exit_msg}"
else
echo ""
fi
}



#----------------------------function will write the message to Console / Log File----------------------------#

#Usage : write_log < Whatever message you need to log >

write_log()
{
msg=$1
to_be_logged="$(date '+%Y%m%d %H:%M:%S') | $msg"
echo ${to_be_logged}
}

#-----------------------------------Executes a Command--------------------------------------------------------#



#Usage : run_cmd < The command to execute >

run_cmd()
{
cmd=$1
if [ -z $2 ]; then
fail_on_error="break_code"
else
fail_on_error=$2
fi
write_log "Executing Command --> $1"
$cmd
error_code=$?
if [ ! $fail_on_error = "ignore_errors" ]; then
check_error $error_code "$cmd"
fi
}

#--------------------------------Begin execution of Steps------------------------------------------------#

Expand All @@ -34,9 +106,29 @@ else
fi


build/install_dependencies
check_error $? "build/install_dependencies"
write_log "Installing dependencies [sh -x build/install_dependencies]"

# All jars below are not present in maven central or any public repository, thats why they are added manually while building gimel.

mvn install:install-file -DgroupId=qubole-hive-JDBC -DartifactId=qubole-hive-JDBC -Dversion=0.0.7 -Dpackaging=jar -Dfile=${this_dir}/../lib/qubole-hive-JDBC.jar 1>>/dev/null 2>&1
check_error $? "install qubole-hive-JDBC"

mvn install:install-file -DgroupId=com.hortonworks -DartifactId=shc-core -Dversion=1.1.2-2.3-s_2.11 -Dpackaging=jar -Dfile=${this_dir}/../lib/shc-core.jar 1>>/dev/null 2>&1
check_error $? "install shc-core"

mvn install:install-file -DgroupId=com.osscube -DartifactId=aerospike-spark -Dversion=0.3-SNAPSHOT -Dpackaging=jar -Dfile=${this_dir}/../lib/aerospike-spark.jar 1>>/dev/null 2>&1
check_error $? "install aerospike-spark"

mvn install:install-file -DgroupId=com.twitter -DartifactId=zookeeper-client_2.10 -Dversion=2.0.0_fs-b -Dpackaging=jar -Dfile=${this_dir}/../lib/zookeeper-client_2.10-2.0.0_fs-b.jar 1>>/dev/null 2>&1
check_error $? "install zookeeper-client"

mvn install:install-file -DgroupId=com.teradata.jdbc -DartifactId=terajdbc4 -Dversion=15.10.00.22 -Dpackaging=jar -Dfile=${this_dir}/../lib/terajdbc4-15.10.00.22.jar 1>>/dev/null 2>&1
check_error $? "install terajdbc4"

mvn install:install-file -DgroupId=com.teradata.jdbc -DartifactId=tdgssconfig -Dversion=15.10.00.22 -Dpackaging=jar -Dfile=${this_dir}/../lib/tdgssconfig-15.10.00.22.jar 1>>/dev/null 2>&1
check_error $? "install tdgssconfig"

write_log "Building the project [mvn install ${user_args}]"
mvn install "$@"
check_error $? "mvn install $@"

Expand Down
73 changes: 71 additions & 2 deletions build/install_dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,77 @@

this_script=`pwd`/$BASH_SOURCE
this_dir=`dirname $this_script`
# Source reusable functions
source ${this_dir}/gimel_functions

#----------------------------function will check for error code & exit if failure, else proceed further----------------------------#

#usage : check_error <$?> <custom_error_code>
#Example: Check_error < pass $? from the shell command > < Custom Message for errorcode -gt 0 >

check_error()
{
cmd_error_code=$1
custom_message=$2
if [ ${cmd_error_code} -gt 0 ]; then
write_log "Error | Stage | ${custom_message}"
exit ${cmd_error_code}
else
write_log "Success | Stage | ${custom_message}"
fi
}

#----------------------------function will check for error code & warn if failure----------------------------#

#usage : check_warning <$?> <custom_error_code>
#Example: Check_warning < pass $? from the shell command > < Custom Message for errorcode -gt 0 >


check_warning()
{

cmd_error_code=$1
pgm_exit_code=$2
pgm_exit_msg=$3
if [ ${cmd_error_code} -gt 0 ]; then
write_log "WARNING ! ${cmd_error_code} ${pgm_exit_code} ${pgm_exit_msg}"
else
echo ""
fi
}



#----------------------------function will write the message to Console / Log File----------------------------#

#Usage : write_log < Whatever message you need to log >

write_log()
{
msg=$1
to_be_logged="$(date '+%Y%m%d %H:%M:%S') | $msg"
echo ${to_be_logged}
}

#-----------------------------------Executes a Command--------------------------------------------------------#



#Usage : run_cmd < The command to execute >

run_cmd()
{
cmd=$1
if [ -z $2 ]; then
fail_on_error="break_code"
else
fail_on_error=$2
fi
write_log "Executing Command --> $1"
$cmd
error_code=$?
if [ ! $fail_on_error = "ignore_errors" ]; then
check_error $error_code "$cmd"
fi
}

#--------------------------------Begin execution of Steps------------------------------------------------#

Expand Down
6 changes: 3 additions & 3 deletions docs/getting-started/gimel-modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<dependency>
<groupId>com.paypal.gimel</groupId>
<artifactId>gimel-tools</artifactId> <!--Refer one of the below listed 3 versions, depending on the required spark version -->
<version>2.0.0-SNAPSHOT</version> <!--provides spark 2.2.0 compiled code-->
<version>2.4.7-SNAPSHOT</version> <!--provides spark 2.4.7 compiled code-->
<scope>provided</scope> <!--Ensure scope is provided as the gimel libraries can be added at runtime-->
</dependency>
```
Expand All @@ -23,7 +23,7 @@
<dependency>
<groupId>com.paypal.gimel</groupId>
<artifactId>gimel-sql</artifactId> <!--Refer one of the below listed 3 versions, depending on the required spark version -->
<version>2.0.0-SNAPSHOT</version> <!--provides spark 2.2.0 compiled code-->
<version>2.4.7-SNAPSHOT</version> <!--provides spark 2.4.7 compiled code-->
<scope>provided</scope> <!--Ensure scope is provided as the gimel libraries can be added at runtime-->
</dependency>
```
Expand All @@ -32,7 +32,7 @@
<dependency>
<groupId>com.paypal.gimel</groupId>
<artifactId>gimel-core</artifactId> <!--Refer one of the below listed 3 versions, depending on the required spark version -->
<version>2.0.0-SNAPSHOT</version> <!--provides spark 2.2.0 compiled code-->
<version>2.4.7-SNAPSHOT</version> <!--provides spark 2.4.7 compiled code-->
<scope>provided</scope> <!--Ensure scope is provided as the gimel libraries can be added at runtime-->
</dependency>
```
Expand Down
4 changes: 2 additions & 2 deletions docs/gimel-connectors/kafka2.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@

```bash
spark-shell --jars
gimel-tools-2.0.0-SNAPSHOT-uber.jar,
gimel-tools-2.4.7-SNAPSHOT-uber.jar,
generic-deserializers-1.0-SNAPSHOT-uber.jar,
generic-serializers-1.0-SNAPSHOT-uber.jar

Expand Down Expand Up @@ -510,7 +510,7 @@ Users can implement their own logic for getting the kafka properties which will
<dependency>
<groupId>com.paypal.gimel</groupId>
<artifactId>gimel-common</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.4.7-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
```
Expand Down
2 changes: 1 addition & 1 deletion docs/gimel-logging/gimel-logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ object SparkWordCount {
Example:

```shell script
spark-shell --jars gimel-tools-2.0.0-SNAPSHOT-uber.jar \
spark-shell --jars gimel-tools-2.4.7-SNAPSHOT-uber.jar \
--conf spark.driver.extraJavaOptions="-Dgimel.logger.properties.filepath=gimelLoggerConfig.properties" \
--conf spark.executor.extraJavaOptions="-Dgimel.logger.properties.filepath=gimelLoggerConfig.properties" \
--conf spark.files=/path/to/gimelLoggerConfig.properties \
Expand Down
2 changes: 1 addition & 1 deletion docs/gimel-serde/gimel-serde.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Example:
spark-shell --jars \
generic-deserializers-1.0-SNAPSHOT-uber.jar,\
generic-serializers-1.0-SNAPSHOT-uber.jar,\
gimel-tools-2.0.0-SNAPSHOT-uber.jar
gimel-tools-2.4.7-SNAPSHOT-uber.jar
```

## Generic Deserializers
Expand Down
2 changes: 1 addition & 1 deletion docs/try-gimel/0-prerequisite.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ quickstart/start-gimel kafka,elasticsearch,hbase-master,hbase-regionserver
```
docker exec -it spark-master bash -c \
"export USER=an;export SPARK_HOME=/spark/;export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin; \
/spark/bin/spark-shell --jars /root/gimel-sql-2.0.0-SNAPSHOT-uber.jar"
/spark/bin/spark-shell --jars /root/gimel-sql-2.4.7-SNAPSHOT-uber.jar"
```

**Note:** *You can view the Spark UI <a href="http://localhost:4040" target="_blank">here</a>*
Expand Down
35 changes: 13 additions & 22 deletions gimel-dataapi/gimel-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ under the License.
<parent>
<artifactId>gimel-dataapi</artifactId>
<groupId>com.paypal.gimel</groupId>
<version>2.0.0-SNAPSHOT</version>
<version>2.4.7-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gimel-common</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.4.7-SNAPSHOT</version>

<dependencies>
<dependency>
Expand All @@ -55,9 +55,9 @@ under the License.
<scope>${packaging.scope}</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-xml</artifactId>
<version>2.11.0-M4</version>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-xml_${scala.binary.version}</artifactId>
<version>${scala.xml.version}</version>
<scope>${scala.packaging.scope}</scope>
</dependency>
<dependency>
Expand All @@ -82,12 +82,6 @@ under the License.
<groupId>com.paypal.gimel</groupId>
<artifactId>gimel-logger</artifactId>
<version>${gimel.version}-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
Expand All @@ -97,7 +91,7 @@ under the License.
<dependency>
<groupId>com.paypal.gimel</groupId>
<artifactId>serde-common</artifactId>
<version>1.0-SNAPSHOT</version>
<version>2.4.7-SNAPSHOT</version>
<scope>${packaging.scope}</scope>
</dependency>
<dependency>
Expand All @@ -106,12 +100,6 @@ under the License.
<version>${kafka.version}</version>
<scope>${packaging.scope}</scope>
</dependency>
<dependency>
<groupId>com.databricks</groupId>
<artifactId>spark-avro_${scala.binary.version}</artifactId>
<version>4.0.0</version>
<scope>${packaging.scope}</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
Expand Down Expand Up @@ -162,7 +150,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-${kafka.binary.version}_${scala.binary.version}</artifactId>
<artifactId>spark-streaming-kafka-${spark.kafka.connector.version}</artifactId>
<version>${spark.version}</version>
<scope>${spark.packaging.scope}</scope>
</dependency>
Expand Down Expand Up @@ -197,7 +185,7 @@ under the License.
<version>${confluent.version}</version>
<scope>test</scope>
</dependency>
<!-- Kafka local testing utility needs Netty 3.x at test scope for the minicluster -->
<!-- Kafka local testing utility needs Netty 3.x at test scope for the minicluster -->
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
Expand All @@ -211,7 +199,7 @@ under the License.
<scope>test</scope>
</dependency>
<dependency>
<groupId> net.jpountz.lz4</groupId>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
<version>1.3.0</version>
<scope>test</scope>
Expand All @@ -220,16 +208,19 @@ under the License.
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>

Expand Down Expand Up @@ -277,7 +268,7 @@ under the License.
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.1</version>
<version>${maven.shade.plugin.version}</version>
<configuration>
<relocations>
<relocation>
Expand Down
Loading