Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sync fields with snowplow r73 080 #26

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.snowplowanalytics</groupId>
<artifactId>snowplow-scala-analytics-sdk</artifactId>
<version>0.1.1</version>

<dependencies>
<dependency>
<groupId>org.scalaz</groupId>
<artifactId>scalaz-core_2.11</artifactId>
<version>7.0.6</version>
</dependency>
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-jackson_2.11</artifactId>
<version>3.2.10</version>
</dependency>
<dependency>
<groupId>org.specs2</groupId>
<artifactId>specs2-core_2.11</artifactId>
<version>2.3.13</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.typelevel</groupId>
<artifactId>scalaz-specs2_2.11</artifactId>
<version>0.2</version>
<scope>test</scope>
</dependency>
</dependencies>


<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<id>scala-compile</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>

<configuration>
<scalaCompatVersion>2.11</scalaCompatVersion>
<scalaVersion>2.11.8</scalaVersion>
<recompileMode>incremental</recompileMode>
<fork>true</fork>
<useZincServer>true</useZincServer>
<jvmArgs>
<jvmArg>-Xmx2g</jvmArg>
<jvmArg>-Xms2g</jvmArg>
</jvmArgs>

<sourceDir>${project.basedir}/src/main/scala</sourceDir>
<testSourceDir>${project.basedir}/src/test/scala</testSourceDir>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.1</version>
<configuration>
<resources>
<resource>
<directory>${project.basedir}</directory>
<includes>
<include>Manifestfile</include>
</includes>
<filtering>true</filtering>
</resource>
</resources>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
</manifest>
</archive>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>all</shadedClassifierName>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</plugin>

</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,11 @@ object EventTransformer {
"mkt_term" -> StringField,
"mkt_content" -> StringField,
"mkt_campaign" -> StringField,
"contexts" -> ContextsField,
"se_category" -> StringField,
"se_action" -> StringField,
"se_label" -> StringField,
"se_property" -> StringField,
"se_value" -> StringField,
"unstruct_event" -> UnstructField,
"tr_orderid" -> StringField,
"tr_affiliation" -> StringField,
"tr_total" -> DoubleField,
Expand Down Expand Up @@ -170,8 +168,7 @@ object EventTransformer {
"etl_tags" -> StringField,
"dvce_sent_tstamp" -> TstampField,
"refr_domain_userid" -> StringField,
"refr_device_tstamp" -> TstampField,
"derived_contexts" -> ContextsField,
"refr_dvce_tstamp" -> TstampField,
"domain_sessionid" -> StringField,
"derived_tstamp" -> TstampField,
"event_vendor" -> StringField,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.snowplowanalytics.snowplow.analytics.scalasdk.json

object Test {
def main(args: Array[String]) {

val fieldsRealStr = "app_id platform etl_tstamp collector_tstamp dvce_created_tstamp event event_id txn_id name_tracker v_tracker v_collector v_etl user_id user_ipaddress user_fingerprint domain_userid domain_sessionidx network_userid geo_country geo_region geo_city geo_zipcode geo_latitude geo_longitude geo_region_name ip_isp ip_organization ip_domain ip_netspeed page_url page_title page_referrer page_urlscheme page_urlhost page_urlport page_urlpath page_urlquery page_urlfragment refr_urlscheme refr_urlhost refr_urlport refr_urlpath refr_urlquery refr_urlfragment refr_medium refr_source refr_term mkt_medium mkt_source mkt_term mkt_content mkt_campaign se_category se_action se_label se_property se_value tr_orderid tr_affiliation tr_total tr_tax tr_shipping tr_city tr_state tr_country ti_orderid ti_sku ti_name ti_category ti_price ti_quantity pp_xoffset_min pp_xoffset_max pp_yoffset_min pp_yoffset_max useragent br_name br_family br_version br_type br_renderengine br_lang br_features_pdf br_features_flash br_features_java br_features_director br_features_quicktime br_features_realplayer br_features_windowsmedia br_features_gears br_features_silverlight br_cookies br_colordepth br_viewwidth br_viewheight os_name os_family os_manufacturer os_timezone dvce_type dvce_ismobile dvce_screenwidth dvce_screenheight doc_charset doc_width doc_height tr_currency tr_total_base tr_tax_base tr_shipping_base ti_currency ti_price_base base_currency geo_timezone mkt_clickid mkt_network etl_tags dvce_sent_tstamp refr_domain_userid refr_dvce_tstamp domain_sessionid derived_tstamp event_vendor event_name event_format event_version event_fingerprint true_tstamp"
val fieldsReal = fieldsRealStr.split(" ").toSet

val fieldsTransformerStr = " \"app_id\" -> StringField,\r\n \"platform\" -> StringField,\r\n \"etl_tstamp\" -> TstampField,\r\n \"collector_tstamp\" -> TstampField,\r\n \"dvce_created_tstamp\" -> TstampField,\r\n \"event\" -> StringField,\r\n \"event_id\" -> StringField,\r\n \"txn_id\" -> IntField,\r\n \"name_tracker\" -> StringField,\r\n \"v_tracker\" -> StringField,\r\n \"v_collector\" -> StringField,\r\n \"v_etl\" -> StringField,\r\n \"user_id\" -> StringField,\r\n \"user_ipaddress\" -> StringField,\r\n \"user_fingerprint\" -> StringField,\r\n \"domain_userid\" -> StringField,\r\n \"domain_sessionidx\" -> IntField,\r\n \"network_userid\" -> StringField,\r\n \"geo_country\" -> StringField,\r\n \"geo_region\" -> StringField,\r\n \"geo_city\" -> StringField,\r\n \"geo_zipcode\" -> StringField,\r\n \"geo_latitude\" -> DoubleField,\r\n \"geo_longitude\" -> DoubleField,\r\n \"geo_region_name\" -> StringField,\r\n \"ip_isp\" -> StringField,\r\n \"ip_organization\" -> StringField,\r\n \"ip_domain\" -> StringField,\r\n \"ip_netspeed\" -> StringField,\r\n \"page_url\" -> StringField,\r\n \"page_title\" -> StringField,\r\n \"page_referrer\" -> StringField,\r\n \"page_urlscheme\" -> StringField,\r\n \"page_urlhost\" -> StringField,\r\n \"page_urlport\" -> IntField,\r\n \"page_urlpath\" -> StringField,\r\n \"page_urlquery\" -> StringField,\r\n \"page_urlfragment\" -> StringField,\r\n \"refr_urlscheme\" -> StringField,\r\n \"refr_urlhost\" -> StringField,\r\n \"refr_urlport\" -> IntField,\r\n \"refr_urlpath\" -> StringField,\r\n \"refr_urlquery\" -> StringField,\r\n \"refr_urlfragment\" -> StringField,\r\n \"refr_medium\" -> StringField,\r\n \"refr_source\" -> StringField,\r\n \"refr_term\" -> StringField,\r\n \"mkt_medium\" -> StringField,\r\n \"mkt_source\" -> StringField,\r\n \"mkt_term\" -> StringField,\r\n \"mkt_content\" -> StringField,\r\n \"mkt_campaign\" -> StringField,\r\n \"contexts\" -> ContextsField,\r\n \"se_category\" -> StringField,\r\n \"se_action\" -> StringField,\r\n \"se_label\" -> StringField,\r\n \"se_property\" -> StringField,\r\n \"se_value\" -> StringField,\r\n \"unstruct_event\" -> UnstructField,\r\n \"tr_orderid\" -> StringField,\r\n \"tr_affiliation\" -> StringField,\r\n \"tr_total\" -> DoubleField,\r\n \"tr_tax\" -> DoubleField,\r\n \"tr_shipping\" -> DoubleField,\r\n \"tr_city\" -> StringField,\r\n \"tr_state\" -> StringField,\r\n \"tr_country\" -> StringField,\r\n \"ti_orderid\" -> StringField,\r\n \"ti_sku\" -> StringField,\r\n \"ti_name\" -> StringField,\r\n \"ti_category\" -> StringField,\r\n \"ti_price\" -> DoubleField,\r\n \"ti_quantity\" -> IntField,\r\n \"pp_xoffset_min\" -> IntField,\r\n \"pp_xoffset_max\" -> IntField,\r\n \"pp_yoffset_min\" -> IntField,\r\n \"pp_yoffset_max\" -> IntField,\r\n \"useragent\" -> StringField,\r\n \"br_name\" -> StringField,\r\n \"br_family\" -> StringField,\r\n \"br_version\" -> StringField,\r\n \"br_type\" -> StringField,\r\n \"br_renderengine\" -> StringField,\r\n \"br_lang\" -> StringField,\r\n \"br_features_pdf\" -> BoolField,\r\n \"br_features_flash\" -> BoolField,\r\n \"br_features_java\" -> BoolField,\r\n \"br_features_director\" -> BoolField,\r\n \"br_features_quicktime\" -> BoolField,\r\n \"br_features_realplayer\" -> BoolField,\r\n \"br_features_windowsmedia\" -> BoolField,\r\n \"br_features_gears\" -> BoolField,\r\n \"br_features_silverlight\" -> BoolField,\r\n \"br_cookies\" -> BoolField,\r\n \"br_colordepth\" -> StringField,\r\n \"br_viewwidth\" -> IntField,\r\n \"br_viewheight\" -> IntField,\r\n \"os_name\" -> StringField,\r\n \"os_family\" -> StringField,\r\n \"os_manufacturer\" -> StringField,\r\n \"os_timezone\" -> StringField,\r\n \"dvce_type\" -> StringField,\r\n \"dvce_ismobile\" -> BoolField,\r\n \"dvce_screenwidth\" -> IntField,\r\n \"dvce_screenheight\" -> IntField,\r\n \"doc_charset\" -> StringField,\r\n \"doc_width\" -> IntField,\r\n \"doc_height\" -> IntField,\r\n \"tr_currency\" -> StringField,\r\n \"tr_total_base\" -> DoubleField,\r\n \"tr_tax_base\" -> DoubleField,\r\n \"tr_shipping_base\" -> DoubleField,\r\n \"ti_currency\" -> StringField,\r\n \"ti_price_base\" -> DoubleField,\r\n \"base_currency\" -> StringField,\r\n \"geo_timezone\" -> StringField,\r\n \"mkt_clickid\" -> StringField,\r\n \"mkt_network\" -> StringField,\r\n \"etl_tags\" -> StringField,\r\n \"dvce_sent_tstamp\" -> TstampField,\r\n \"refr_domain_userid\" -> StringField,\r\n \"refr_device_tstamp\" -> TstampField,\r\n \"derived_contexts\" -> ContextsField,\r\n \"domain_sessionid\" -> StringField,\r\n \"derived_tstamp\" -> TstampField,\r\n \"event_vendor\" -> StringField,\r\n \"event_name\" -> StringField,\r\n \"event_format\" -> StringField,\r\n \"event_version\" -> StringField,\r\n \"event_fingerprint\" -> StringField,\r\n \"true_tstamp\" -> TstampField"
val fieldsTransformer = fieldsTransformerStr.split(",").toList
.map(line => line.split("->")(0)) // drop the '-> xField' part
.map(_.replaceAll("[\" \r\n]",""))
.toSet

println(fieldsReal.size)
println(fieldsTransformer.size)

println("In transformer but not in data: " + fieldsTransformer.diff(fieldsReal))
println("In data but not in transformer: " + fieldsReal.diff(fieldsTransformer))
}
}