Skip to content

Commit

Permalink
Merge pull request #670 from nitishgupta/master
Browse files Browse the repository at this point in the history
BasicTextAnnotationBuilder function for pre-tokenized text with cogcomp-nlpy
  • Loading branch information
Daniel Khashabi authored Jul 23, 2018
2 parents 7066e7e + 15ff023 commit cd8ac71
Show file tree
Hide file tree
Showing 33 changed files with 154 additions and 148 deletions.
4 changes: 2 additions & 2 deletions big-data-utils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -23,7 +23,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.xeustechnologies.google-api</groupId>
Expand Down
8 changes: 4 additions & 4 deletions chunker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -13,7 +13,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>

<dependency>
Expand All @@ -24,12 +24,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>LBJava-NLP-tools</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-pos</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
Expand Down
20 changes: 10 additions & 10 deletions commasrl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -35,48 +35,48 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-curator</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-tokenizer</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-corpusreaders</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-inference</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>stanford_3.3.1</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-pos</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-ner</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-chunker</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
Expand Down
2 changes: 1 addition & 1 deletion core-utilities/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<artifactId>illinois-core-utilities</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,26 @@ public static TextAnnotation createTextAnnotationFromTokens(List<String[]> token


/**
* The default way to create a {@link TextAnnotation} from pre-tokenized text.
* A way to create a {@link TextAnnotation} from pre-tokenized text from Python
*
* @param tokenizedSentences A list of sentences, each one being an array of tokens
* @param tokenizedSentences A list of sentences, each one being an list of tokens
* @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
*/
public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<String>> tokenizedSentences) {
// Function name is not createTextAnnotationFromTokens - due to same erasure error
public static TextAnnotation createTextAnnotationFromListofListofTokens(List<List<Object>> tokenizedSentences) {
// This function takes List<List<Object>> to be able to run with cogcomp-nlpy (using pyjnius)
// Convert the inner lists to String arrays
// Call the default TextAnnotation builder function

List<String[]> tokenizedSentences_formatted = new ArrayList<String[]>();

// Converting inner list to array
for (List<String> sentence : tokenizedSentences) {
String[] sentence_array = (String[]) sentence.toArray();
for (List<Object> sentence : tokenizedSentences) {
String[] sentence_array = new String[sentence.size()];
int token_idx = 0;
for (Object w : sentence) {
sentence_array[token_idx] = (String) w;
token_idx += 1;
}
tokenizedSentences_formatted.add(sentence_array);
}

Expand Down
6 changes: 3 additions & 3 deletions corpusreaders/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<artifactId>illinois-corpusreaders</artifactId>
Expand All @@ -15,12 +15,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-tokenizer</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down
4 changes: 2 additions & 2 deletions curator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<artifactId>illinois-curator</artifactId>
Expand All @@ -16,7 +16,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>

<!-- Curator-related dependencies -->
Expand Down
6 changes: 3 additions & 3 deletions dataless-classifier/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -21,12 +21,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-tokenizer</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down
12 changes: 6 additions & 6 deletions depparse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<artifactId>illinois-depparse</artifactId>
Expand All @@ -16,27 +16,27 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-edison</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-lemmatizer</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-pos</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-chunker</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>

<dependency>
Expand Down
8 changes: 4 additions & 4 deletions edison/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
</parent>

<artifactId>illinois-edison</artifactId>
Expand All @@ -16,7 +16,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<!--<dependency>-->
<!--<groupId>edu.illinois.cs.cogcomp.resources</groupId>-->
Expand Down Expand Up @@ -80,13 +80,13 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-corpusreaders</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<!-- Used only in utilities.CreateTestTAResource -->
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-curator</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
4 changes: 2 additions & 2 deletions external/clausie/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand All @@ -24,7 +24,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>external-commons</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down
6 changes: 3 additions & 3 deletions external/external-commons/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand All @@ -16,12 +16,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-tokenizer</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.cogcomp</groupId>
Expand Down
6 changes: 3 additions & 3 deletions external/path-lstm/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand All @@ -16,12 +16,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>external-commons</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-edison</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<dependency>
<groupId>org.cogcomp</groupId>
Expand Down
6 changes: 3 additions & 3 deletions external/stanford_3.3.1/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>illinois-cogcomp-nlp</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>4.0.8</version>
<version>4.0.9</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand All @@ -19,7 +19,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
<!-- Stanford Core NLP, used for its parser (for SRL) -->
<dependency>
Expand All @@ -36,7 +36,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-corpusreaders</artifactId>
<version>4.0.8</version>
<version>4.0.9</version>
</dependency>
</dependencies>
</project>
Loading

0 comments on commit cd8ac71

Please sign in to comment.