Skip to content

Commit

Permalink
Closes #614: Add event table (#615)
Browse files Browse the repository at this point in the history
* Refs #614: adds cle keyspace and events table

* Refs #614: fixes index out of bounds error by using slice instead of substring
  • Loading branch information
janehmueller authored Nov 3, 2017
1 parent bbf2fcf commit 6700557
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
19 changes: 17 additions & 2 deletions cassandra.cql
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,6 @@ CREATE TABLE datalake.versiondiff (




-- Indices
CREATE CUSTOM INDEX ON datalake.subject (name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = {
Expand Down Expand Up @@ -578,7 +577,7 @@ CREATE TABLE evaluation.featureentries (
staging frozen<subject>,
score map<text, frozen<list<double>>>,
correct boolean
); -- dropable
);

CREATE TABLE evaluation.goldstandard (
id1 uuid PRIMARY KEY,
Expand Down Expand Up @@ -671,3 +670,19 @@ CREATE TABLE evaluation.version (
program text,
timestamp timestamp
);



-- CLE
CREATE KEYSPACE cle WITH REPLICATION = {
'class' : 'SimpleStrategy',
'replication_factor' : 3
};

CREATE TABLE cle.events (
type text,
user text,
timestamp timestamp,
props map<text, text>,
PRIMARY KEY ((type, user), timestamp)
) WITH CLUSTERING ORDER BY (timestamp DESC);
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import de.hpi.ingestion.dataimport.dbpedia.models.Relation
import de.hpi.ingestion.dataimport.wikidata.models.WikidataEntity
import de.hpi.ingestion.framework.SparkJob
import de.hpi.ingestion.textmining.models.{EntityLink, ParsedWikipediaEntry, Sentence}
import de.hpi.ingestion.textmining.tokenizer.IngestionTokenizer
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import de.hpi.ingestion.textmining.preprocessing.CompanyLinkFilter.extractCompanyPages
Expand Down Expand Up @@ -107,7 +106,7 @@ object RelationSentenceParser {
.sortBy(_.offset)
.foreach { entity =>
entity.offset.foreach { entityOffset =>
bagOfWords ++= tokenizer.process(sentence.substring(sentenceOffset, entityOffset))
bagOfWords ++= tokenizer.process(sentence.slice(sentenceOffset, entityOffset))
sentenceOffset = entityOffset + entity.alias.length
}
}
Expand Down

0 comments on commit 6700557

Please sign in to comment.