diff --git a/README.md b/README.md
index a438fb0fe..0a8dbd0da 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ Each library contains detailed readme and instructions on how to use it. In addi
| [commasrl](commasrl/README.md) | This software extracts relations that commas participate in. |
| [similarity](similarity/README.md) | This software compare objects --especially Strings-- and return a score indicating how similar they are. |
| [temporal-normalizer](temporal-normalizer/README.md) | A temporal extractor and normalizer. |
+| [dataless-classifier](dataless-classifier/README.md) | Classifies text into a user-specified label hierarchy from just the textual label descriptions |
| [external-annotators](external/README.md) | A collection useful external annotators. |
diff --git a/core-utilities/src/main/java/edu/illinois/cs/cogcomp/core/datastructures/ViewNames.java b/core-utilities/src/main/java/edu/illinois/cs/cogcomp/core/datastructures/ViewNames.java
index 102e16796..fd788635a 100644
--- a/core-utilities/src/main/java/edu/illinois/cs/cogcomp/core/datastructures/ViewNames.java
+++ b/core-utilities/src/main/java/edu/illinois/cs/cogcomp/core/datastructures/ViewNames.java
@@ -88,6 +88,9 @@ public class ViewNames {
public static final String WIKIFIER = "WIKIFIER";
+ public static final String DATALESS_ESA = "DATALESS_ESA";
+ public static final String DATALESS_W2V = "DATALESS_W2V";
+
/**
* @deprecated Replaced by {@link #CLAUSES_CHARNIAK}, {@link #CLAUSES_BERKELEY},
* {@link #CLAUSES_STANFORD}
@@ -150,6 +153,8 @@ public static ViewTypes getViewType(String viewName) {
case SHALLOW_PARSE:
case QUANTITIES:
case WIKIFIER:
+ case DATALESS_ESA:
+ case DATALESS_W2V:
case CLAUSES_CHARNIAK:
case CLAUSES_STANFORD:
case CLAUSES_BERKELEY:
diff --git a/dataless-classifier/README.md b/dataless-classifier/README.md
new file mode 100644
index 000000000..6b5478172
--- /dev/null
+++ b/dataless-classifier/README.md
@@ -0,0 +1,45 @@
+# CogComp-DatalessClassifier
+Given a label ontology, and textual descriptions of those labels, Dataless-Classifier is capable of classifying arbitrary text into that ontology.
+
+It is particularly useful in those scenarios where it is difficult/expensive to gather enough training data to train a supervised text classifier. Dataless-Classifier utilizes the semantic meaning of the labels to bypass the need for explicit supervision. For more information, please visit our main project [page](http://cogcomp.org/page/project_view/6).
+
+
+Some key points:
+- The Main classes for the Dataless Annotators are:
+ * **ESADatalessAnnotator** for the ESA-based Dataless Annotator
+ * **W2VDatalessAnnotator** for the Word2Vec-based Dataless Annotator
+- Dataless Annotators add the **DATALESS_ESA** and **DATALESS_W2V** views to the input `TextAnnotation` respectively, and it requires the presence of a **TOKENS** view with the end-user's desired Tokenization.
+- Since Labels/Topics are inferred at the Document-Level, all topic annotations span the entire document.
+- Sample invocation has been provided in the main functions of each annotator.
+- Both annotators load up embeddings in memory, and thus can easily consume upto **10GB RAM**.
+
+
+## Label Hierarchy
+Dataless Classification requires the end-user to specifcy a Label hierarchy (with label descriptions), which it classifies into. The Label hierarchy needs to be provided using a very specific format:
+* **labelNamePath**: Specify your label id to label name mapping here in the `labelID \t labelName` format
+ (label id can be any ID specific to your system, however we use the label name itself as ID in our sample hierachy for readibility)
+* **labelHierarchyPath**: The first line of this file should contain tab-separated list of Top-Level nodes in the hierarchy (i.e. the ones directly connected to the root). Then, every following line should specify the connections in the hierachy in the `parentLabelID \t childLabelID1 \t childLabelID2 \t ...` format.
+* **labelDescPath**: Dataless' performance hinges on good label descriptions, which you specify in this file in the `labelID \t labelDescription` format.
+
+We provide a sample 20newsgroups hierarchy with label descriptions inside data/hierarchy/20newsgroups, where:
+* idToLabelNameMap.txt should be used as labelNamePath
+* parentChildIdMap.txt should be used as labelHierarchyPath
+* labelDesc\_Kws\_simple.txt should be used as labelDescPath
+
+We also provide improved 20newsgroups label descriptions in *labelDesc\_Kws\_embellished.txt* which corresponds to the label descriptions used in [2], whereas the *labelDesc\_Kws\_simple.txt* corresponds to the label descriptions used in [1].
+
+## Embeddings
+ESA and Word2Vec Embeddings are fetched from the DataStore on demand.
+
+## Config
+A sample config file with the default values has been provided in the config folder .. *config/project.properties*
+
+To check whether you are properly set to use the project or not, run:
+* `mvn -Dtest=ESADatalessTest#testPredictions test` to test the ESADatalessAnnotator.
+* `mvn -Dtest=W2VDatalessTest#testPredictions test` to test the W2VDatalessAnnotator.
+
+If you use this software for research, please cite the following papers:
+
+[1] Chang, Ming-Wei, et al. "Importance of Semantic Representation: Dataless Classification." AAAI. Vol. 2. 2008.
+
+[2] Song, Yangqiu, and Dan Roth. "On Dataless Hierarchical Text Classification." AAAI. Vol. 7. 2014.
diff --git a/dataless-classifier/config/project.properties b/dataless-classifier/config/project.properties
new file mode 100644
index 000000000..709029944
--- /dev/null
+++ b/dataless-classifier/config/project.properties
@@ -0,0 +1,25 @@
+## Use ResourceManager to read these properties
+# curatorHost = trollope.cs.illinois.edu
+# curatorPort = 9010
+
+## Target Label Hierarchy
+labelHierarchyPath = data/hierarchies/20newsgroups/parentChildIdMap.txt
+labelNamePath = data/hierarchies/20newsgroups/idToLabelNameMap.txt
+labelDescPath = data/hierarchies/20newsgroups/labelDesc_Kws_simple.txt
+# labelDescPath = data/hierarchies/20newsgroups/labelDesc_Kws_embellished.txt
+
+## Classifier configuration
+inferenceBottomUp = True
+classifierThreshold = 0.99
+classifierLeastK = 1
+classifierMaxK = 3
+
+## ESA Configuration
+#esaPath = data/embeddings/esaEmbedding/esa_vectors.txt
+#esaMapPath = data/embeddings/esaEmbedding/idToConceptMap.txt
+#esaDimension = 100
+
+## W2V Configuration
+#w2vPath = data/embeddings/w2vEmbedding-100/w2v_vectors.txt
+#w2vDimension = 200
+
diff --git a/dataless-classifier/data/electronicsTestDocument.txt b/dataless-classifier/data/electronicsTestDocument.txt
new file mode 100644
index 000000000..799d39f7a
--- /dev/null
+++ b/dataless-classifier/data/electronicsTestDocument.txt
@@ -0,0 +1 @@
+yes i know it s nowhere near christmas time but i m gonna loose net access in a few days maybe a week or if i m lucky and wanted to post this for interested people to save till xmas note bell labs is a good place if you have a phd and a good boss i have neither subject xmas light set with levels of brightness another version of a variable brightness xmas light set this set starts with a blinker bulb string diagram orginal way set 0v b b 0rtn modified set for level brightness string 0v 0k w string b 0v rtn note no mods to wiring to the right of this point only one blinker is used note that the blinker would not have as much current thru it as the string bulbs because of the second string of bulbs in parallel with it that s why the use of the 0k w resistor here to add extra current thru the blinker to make up for the current shunted thru the second string while the blinker is glowing and the second string is not glowing when the blinker goes open this resistor has only a slight effect on the brightness of the strings s slightly dimmer s slightly brighter or use a w 0v bulb in place of the 0k resistor if you can get one caution do not replace with a standard c bulb as these draw too much current and burn out the blinker c approx w what you ll see when it s working powerup string will light at full brightness and b will be lit bypassing most of the current from the second string making them not light b will open placing both strings in series making the string that was out to glow at a low brightness and the other string that was on before to glow at reduced brightness be sure to wire and insulate the splices resistor leads and cut wires in a safe manner level brightness xmas light set for easter
diff --git a/dataless-classifier/data/graphicsTestDocument.txt b/dataless-classifier/data/graphicsTestDocument.txt
new file mode 100644
index 000000000..f7e5d430f
--- /dev/null
+++ b/dataless-classifier/data/graphicsTestDocument.txt
@@ -0,0 +1 @@
+i m looking for some recommendations for screen capture programs a couple of issues ago pc mag listed as editor s choices both conversion artist and hijaak for windows anyone have any experience with those or some others i m trying to get an alpha manual in the next few days and i m not making much progress with the screen shots i m currently using dodot and i m about to burn it and the disks it rode it on it s got a lot of freaky bugs and oversights that are driving me crazy tonight it decided that for any graphic it writes out as a tiff file that s under a certain arbitrary size it will swap the left and right sides of the picture usually it confines itself to not copying things to the clipboard so i have to save and load pix for editing in paintbrush or crashing every hour or so the one nice thing it has though is it s dither option you d think that this would turn colors into dots which it does if you go from say colors to colors but if you go from or colors to b w you can set a threshold level for which colors turn to black and which turn to white for me this is useful because i can turn light grays on buttons to white and the dark grays to black and thereby preserve the d effect on buttons and other parts of the window if you understood my description can you tell me if another less buggy program can do this as well much thanks for any help signature david delgreco what lies behind us and what lies technically a writer before us are tiny matters compared delgreco rahul net to what lies within us oliver wendell holmes david f delgreco delgreco rahul net recommendation for screen capture program
diff --git a/dataless-classifier/data/hierarchies/20newsgroups/idToLabelNameMap.txt b/dataless-classifier/data/hierarchies/20newsgroups/idToLabelNameMap.txt
new file mode 100644
index 000000000..48034a682
--- /dev/null
+++ b/dataless-classifier/data/hierarchies/20newsgroups/idToLabelNameMap.txt
@@ -0,0 +1,26 @@
+politics politics
+religion religion
+computer computer
+autos.sports autos.sports
+science science
+sales sales
+talk.politics.guns talk.politics.guns
+talk.politics.mideast talk.politics.mideast
+talk.politics.misc talk.politics.misc
+alt.atheism alt.atheism
+soc.religion.christian soc.religion.christian
+talk.religion.misc talk.religion.misc
+comp.sys.ibm.pc.hardware comp.sys.ibm.pc.hardware
+comp.sys.mac.hardware comp.sys.mac.hardware
+comp.graphics comp.graphics
+comp.windows.x comp.windows.x
+comp.os.ms.windows.misc comp.os.ms.windows.misc
+rec.autos rec.autos
+rec.motorcycles rec.motorcycles
+rec.sport.baseball rec.sport.baseball
+rec.sport.hockey rec.sport.hockey
+sci.electronics sci.electronics
+sci.crypt sci.crypt
+sci.med sci.med
+sci.space sci.space
+misc.forsale misc.forsale
diff --git a/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_embellished.txt b/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_embellished.txt
new file mode 100644
index 000000000..c2b10da6f
--- /dev/null
+++ b/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_embellished.txt
@@ -0,0 +1,26 @@
+politics politics gun fbi guns weapon compound israel arab jews jewish muslim gay homosexual sexual
+religion religion atheist christian atheism god islamic christian god christ church bible jesus christian morality jesus god religion horus
+computer computer bus pc motherboard bios board computer dos mac apple powerbook graphics image gif animation tiff window motif xterm sun windows windows dos microsoft ms driver drivers card printer
+autos.sports autos.sports car ford auto toyota honda nissan bmw bike motorcycle yamaha baseball ball hitter hockey wings espn
+science science circuit electronics radio signal battery encryption key crypto algorithm security doctor medical disease medicine patient space orbit moon earth sky solar
+sales sales sale offer shipping forsale sell price brand obo
+talk.politics.guns gun fbi guns weapon compound
+talk.politics.mideast israel arab jews jewish muslim
+talk.politics.misc gay homosexual sexual
+alt.atheism atheist christian atheism god islamic
+soc.religion.christian christian god christ church bible jesus
+talk.religion.misc christian morality jesus god religion horus
+comp.sys.ibm.pc.hardware bus pc motherboard bios board computer dos
+comp.sys.mac.hardware mac apple powerbook
+comp.graphics graphics image gif animation tiff
+comp.windows.x window motif xterm sun windows
+comp.os.ms.windows.misc windows dos microsoft ms driver drivers card printer
+rec.autos car ford auto toyota honda nissan bmw
+rec.motorcycles bike motorcycle yamaha
+rec.sport.baseball baseball ball hitter
+rec.sport.hockey hockey wings espn
+sci.electronics circuit electronics radio signal battery
+sci.crypt encryption key crypto algorithm security
+sci.med doctor medical disease medicine patient
+sci.space space orbit moon earth sky solar
+misc.forsale sale offer shipping forsale sell price brand obo
diff --git a/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_simple.txt b/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_simple.txt
new file mode 100644
index 000000000..0c0a7ce7b
--- /dev/null
+++ b/dataless-classifier/data/hierarchies/20newsgroups/labelDesc_Kws_simple.txt
@@ -0,0 +1,26 @@
+politics politics politics guns politics mideast politics
+religion religion atheism society religion christianity christian religion
+computer computer computer systems ibm pc hardware computer systems mac macintosh apple hardware computer graphics computer windows x windowsx computer os operating system microsoft windows
+autos.sports autos.sports cars motorcycles baseball hockey
+science science science electronics science cryptography medicine science space
+sales sales for sale discount
+talk.politics.guns politics guns
+talk.politics.mideast politics mideast
+talk.politics.misc politics
+alt.atheism atheism
+soc.religion.christian society religion christianity christian
+talk.religion.misc religion
+comp.sys.ibm.pc.hardware computer systems ibm pc hardware
+comp.sys.mac.hardware computer systems mac macintosh apple hardware
+comp.graphics computer graphics
+comp.windows.x computer windows x windowsx
+comp.os.ms.windows.misc computer os operating system microsoft windows
+rec.autos cars
+rec.motorcycles motorcycles
+rec.sport.baseball baseball
+rec.sport.hockey hockey
+sci.electronics science electronics
+sci.crypt science cryptography
+sci.med science medicine
+sci.space science space
+misc.forsale for sale discount
diff --git a/dataless-classifier/data/hierarchies/20newsgroups/parentChildIdMap.txt b/dataless-classifier/data/hierarchies/20newsgroups/parentChildIdMap.txt
new file mode 100644
index 000000000..5c49b8bbd
--- /dev/null
+++ b/dataless-classifier/data/hierarchies/20newsgroups/parentChildIdMap.txt
@@ -0,0 +1,7 @@
+politics religion computer autos.sports science sales
+politics talk.politics.guns talk.politics.mideast talk.politics.misc
+religion alt.atheism soc.religion.christian talk.religion.misc
+computer comp.sys.ibm.pc.hardware comp.sys.mac.hardware comp.graphics comp.windows.x comp.os.ms.windows.misc
+autos.sports rec.autos rec.motorcycles rec.sport.baseball rec.sport.hockey
+science sci.electronics sci.crypt sci.med sci.space
+sales misc.forsale
\ No newline at end of file
diff --git a/dataless-classifier/pom.xml b/dataless-classifier/pom.xml
new file mode 100644
index 000000000..77d00b615
--- /dev/null
+++ b/dataless-classifier/pom.xml
@@ -0,0 +1,71 @@
+
+
+
+ illinois-cogcomp-nlp
+ edu.illinois.cs.cogcomp
+ 4.0.0
+
+
+ 4.0.0
+
+ illinois-datalessclassification
+ Illinois Dataless Classifier
+ Classifies Text into the given label hierarchy from just the textual label descriptions
+
+
+
+ org.cogcomp
+ cogcomp-datastore
+ 1.9.10
+
+
+ edu.illinois.cs.cogcomp
+ illinois-core-utilities
+ 4.0.0
+
+
+ edu.illinois.cs.cogcomp
+ illinois-tokenizer
+ 4.0.0
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.12
+ true
+
+
+ net.sf.jung
+ jung-api
+ 2.0.1
+
+
+ net.sf.jung
+ jung-graph-impl
+ 2.0.1
+
+
+ commons-cli
+ commons-cli
+ 1.4
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.20.1
+
+
+ -Xmx15g
+
+
+
+
+
+
+
+
diff --git a/dataless-classifier/script/testESADataless.sh b/dataless-classifier/script/testESADataless.sh
new file mode 100644
index 000000000..f2f8520de
--- /dev/null
+++ b/dataless-classifier/script/testESADataless.sh
@@ -0,0 +1,3 @@
+#mvn compile
+#mvn dependency:copy-dependencies
+nice java -Xmx10g -cp ./target/*:./target/dependency/* edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator $@
diff --git a/dataless-classifier/script/testW2VDataless.sh b/dataless-classifier/script/testW2VDataless.sh
new file mode 100644
index 000000000..f7bda7662
--- /dev/null
+++ b/dataless-classifier/script/testW2VDataless.sh
@@ -0,0 +1,3 @@
+#mvn compile
+#mvn dependency:copy-dependencies
+nice java -Xmx10g -cp ./target/*:./target/dependency/* edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator $@
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/AClassifierTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/AClassifierTree.java
new file mode 100755
index 000000000..6c4db0a32
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/AClassifierTree.java
@@ -0,0 +1,214 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import edu.illinois.cs.cogcomp.datalessclassification.hierarchy.SimpleTree;
+import edu.illinois.cs.cogcomp.datalessclassification.hierarchy.TreeNode;
+
+/**
+ * An Abstract Tree class that has the same link structure as a {@link LabelTree}.
+ *
+ * Ideally, different classes will extend this class with different types of Nodes, wherein their corresponding Node
+ * will contain the additional payload (data) required by their corresponding classifier.
+ *
+ * For instance, a simple nearest-neighbor based Dataless Classifier requires a Tree wherein each node contains the label
+ * representation, which is achieved by extending this class ({@link ConceptTree}), and the corresponding Node class {@link ConceptTreeNode}).
+ *
+ * @author shashank
+ */
+
+public abstract class AClassifierTree extends SimpleTree {
+
+ private static final long serialVersionUID = 1L;
+
+ protected String root_label;
+ protected LabelTree labelTree;
+
+ public AClassifierTree(LabelTree labelTree) {
+ super();
+ setLabelTree(labelTree);
+ initializeTreeStructure();
+ }
+
+ protected boolean isLabelTreeInitialized() {
+ return (!(labelTree == null));
+ }
+
+ @SuppressWarnings("unchecked")
+ protected void initializeRoot(String root_label) {
+ T root = (T) T.makeBasicNode(root_label);
+ initializeRoot(root);
+ }
+
+ public String getRootLabel() {
+ return root_label;
+ }
+
+ protected void setLabelTree(LabelTree labelTree) {
+ if (isLabelTreeInitialized())
+ return;
+
+ this.root_label = labelTree.getRoot().getLabelID();
+ initializeRoot(root_label);
+ this.labelTree = labelTree;
+ }
+
+ public LabelTree getLabelTree() {
+ return labelTree;
+ }
+
+ @SuppressWarnings("unchecked")
+ public Set getChildren(String label) {
+ Set set = getChildren((T) T.makeBasicNode(label));
+
+ if (set == null)
+ return null;
+
+ if (set.isEmpty())
+ return Collections.emptySet();
+
+ Set newSet = new HashSet(set.size());
+
+ newSet.addAll(set);
+
+ return newSet;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T getParent(String label) {
+ T parent = getParent((T) T.makeBasicNode(label));
+ return parent;
+ }
+
+ @SuppressWarnings("unchecked")
+ public boolean addEdge(String parent, String child) {
+ T parentNode = (T) T.makeBasicNode(parent);
+ T childNode = (T) T.makeBasicNode(child);
+
+ return addEdge(parentNode, childNode);
+ }
+
+ public boolean addEdges(String parent, Set children) {
+ boolean success = true;
+
+ for (String child : children) {
+ success = addEdge(parent, child);
+
+ if (!success)
+ break;
+ }
+
+ return success;
+ }
+
+ public Set getLeafLabels() {
+ Set set = labelTree.getLeafLabels();
+ return set;
+ }
+
+ @SuppressWarnings("unchecked")
+ public boolean isLeaf(String label) {
+ T node = (T) T.makeBasicNode(label);
+ return isLeaf(node);
+ }
+
+ @SuppressWarnings("unchecked")
+ public int getDepth(String label) {
+ return getDepth((T) T.makeBasicNode(label));
+ }
+
+ @SuppressWarnings("unchecked")
+ public List getAllParents(String label) {
+ List parentNodes = getAllParents((T) T.makeBasicNode(label));
+
+ if (parentNodes == null)
+ return null;
+
+ List parents = new ArrayList<>(parentNodes.size());
+
+ parents.addAll(parentNodes);
+
+ return parents;
+ }
+
+ public List getAllParentLabels(String label) {
+ List parentNodes = getAllParents(label);
+
+ if (parentNodes == null)
+ return null;
+
+ List parents = new ArrayList<>(parentNodes.size());
+
+ for (T p : parentNodes) {
+ parents.add(p.getLabelID());
+ }
+
+ return parents;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T getNodeFromLabel(String label) {
+ T node = getNode((T) T.makeBasicNode(label));
+
+ return node;
+ }
+
+ public Set getSameLevelNodes(String label) {
+ int depth = getDepth(label);
+
+ if (depth == -1)
+ return null;
+
+ List nodes = getBreadthOrderedNodeList();
+
+ Set output = new HashSet<>();
+
+ for (T node : nodes) {
+ int thisDepth = getDepth(node);
+
+ if (thisDepth > depth)
+ break;
+ else if (thisDepth == depth)
+ output.add(node);
+ }
+
+ return output;
+ }
+
+ public Set getSameLevelLabels(String label) {
+ Set nodes = getSameLevelNodes(label);
+
+ if (nodes == null)
+ return null;
+
+ Set output = new HashSet<>(nodes.size());
+
+ for (T p : nodes) {
+ output.add(p.getLabelID());
+ }
+
+ return output;
+ }
+
+ public void initializeTreeStructure() {
+ List nodes = labelTree.getBreadthOrderedLabelList();
+
+ for (String node : nodes) {
+ if (!labelTree.isLeaf(node)) {
+ Set children = labelTree.getChildren(node);
+ addEdges(node, children);
+ }
+ }
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTree.java
new file mode 100755
index 000000000..8bb016aff
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTree.java
@@ -0,0 +1,362 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.illinois.cs.cogcomp.datalessclassification.representation.AEmbedding;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVector;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVectorOperations;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A ConceptTree has the same link structure as a {@link LabelTree}, with the addition that
+ * it contains vector representations for each node (labelID).
+ *
+ * @author yqsong@illinois.edu
+ * @author shashank
+ */
+
+public class ConceptTree extends AClassifierTree> {
+
+ private static Logger logger = LoggerFactory.getLogger(ConceptTree.class);
+
+ private static final long serialVersionUID = 1L;
+
+ private final static String NIL = "NIL";
+
+ private transient Map globalConceptWeights;
+
+ private transient AEmbedding embedding;
+
+ protected transient int numConcepts;
+
+ public ConceptTree(LabelTree labelTree) {
+ super(labelTree);
+ }
+
+ public ConceptTree(LabelTree labelTree, AEmbedding embedding, Map conceptWeights) {
+ this(labelTree, embedding, conceptWeights, 500);
+ }
+
+ public ConceptTree(LabelTree labelTree, AEmbedding embedding, Map conceptWeights,
+ int embeddingSize) {
+ super(labelTree);
+ this.embedding = embedding;
+ this.globalConceptWeights = conceptWeights;
+ this.numConcepts = embeddingSize;
+
+ initializeTree();
+ }
+
+ /**
+ * Generates and Returns a ConceptTree using the provided LabelTree, and the
+ * LabelID -> Embeddings Map
+ */
+ public static ConceptTree generateDenseEmbeddedTreeFromLabelEmbeddingMap(
+ LabelTree labelTree, Map> labelEmbeddings) {
+ ConceptTree conceptTree = new ConceptTree<>(labelTree);
+
+ for (ConceptTreeNode node : conceptTree.getNodes()) {
+ String labelID = node.getLabelID();
+ node.setLabelDescription(labelTree.getLabelDescription(labelID));
+
+ SparseVector conceptVector = labelEmbeddings.get(labelID);
+ node.setConceptVector(conceptVector);
+ }
+
+ return conceptTree;
+ }
+
+ /**
+ * Generates and Returns a ConceptTree using the provided LabelTree, and the
+ * File containing the String representation of the LabelID -> Embeddings Map
+ */
+ public static ConceptTree generateDenseEmbeddedTreeFromFile(
+ LabelTree labelTree, String repFile) {
+ logger.info("Reading Label Embeddings from " + repFile);
+ File inputFile = new File(repFile);
+
+ Map> labelEmbeddings = new HashMap<>();
+
+ try(BufferedReader bf = new BufferedReader(new FileReader(inputFile))) {
+ String line;
+
+ while ((line = bf.readLine()) != null) {
+ line = line.trim();
+
+ if (line.length() == 0)
+ continue;
+
+ String[] tokens = line.trim().split("\t", 2);
+ String[] stringVec = tokens[1].split(" ");
+
+ String label = tokens[0].trim();
+
+ if (label.length() == 0)
+ continue;
+
+ Map scores = new HashMap<>();
+
+ int i = 0;
+
+ for (String dim : stringVec) {
+ scores.put(i, Double.parseDouble(dim));
+ i++;
+ }
+
+ SparseVector vec = new SparseVector<>(scores);
+
+ labelEmbeddings.put(label, vec);
+ }
+
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ logger.error("File not found at " + repFile);
+ throw new RuntimeException("File not found at " + repFile);
+ } catch (IOException e) {
+ e.printStackTrace();
+ logger.error("Error while reading file");
+ throw new RuntimeException("Error while reading file");
+ }
+
+ ConceptTree conceptTree =
+ generateDenseEmbeddedTreeFromLabelEmbeddingMap(labelTree, labelEmbeddings);
+ return conceptTree;
+ }
+
+ /**
+ * Copy Constructor
+ */
+ public ConceptTree(ConceptTree thatTree) {
+ super(new LabelTree(thatTree.getLabelTree()));
+
+ for (ConceptTreeNode node : getNodes()) {
+ if (isRoot(node))
+ continue;
+
+ String labelID = node.getLabelID();
+ ConceptTreeNode thatNode = thatTree.getNodeFromLabel(labelID);
+
+ String description = thatNode.getLabelDescription();
+ node.setLabelDescription(description);
+
+ SparseVector vector = SparseVector.deepCopy(thatNode.getConceptVector());
+ node.setConceptVector(vector);
+ }
+ }
+
+
+ /**
+ * Initializes the Root node of the tree
+ */
+ @Override
+ protected void initializeRoot(String root_label) {
+ ConceptTreeNode root = ConceptTreeNode.makeBasicTypedNode(root_label);
+ initializeRoot(root);
+ }
+
+
+ /**
+ * Returns all the ChildNodes of a particular node (labelID)
+ */
+ @Override
+ public Set> getChildren(String label) {
+ Set> set =
+ getChildren(ConceptTreeNode.makeBasicTypedNode(label));
+
+ if (set == null)
+ return null;
+
+ if (set.isEmpty())
+ return Collections.emptySet();
+
+ Set> newSet = new HashSet<>(set.size());
+
+ newSet.addAll(set);
+
+ return newSet;
+ }
+
+
+ /**
+ * Returns the Parent Node of a particular node (labelID)
+ */
+ @Override
+ public ConceptTreeNode getParent(String label) {
+ ConceptTreeNode parent =
+ getParent(ConceptTreeNode.makeBasicTypedNode(label));
+ return parent;
+ }
+
+
+ /**
+ * Adds an edge between a ParentNode and a ChildNode
+ */
+ @Override
+ public boolean addEdge(String parent, String child) {
+ ConceptTreeNode parentNode = ConceptTreeNode.makeBasicTypedNode(parent);
+ ConceptTreeNode childNode = ConceptTreeNode.makeBasicTypedNode(child);
+
+ return addEdge(parentNode, childNode);
+ }
+
+ /**
+ * Returns the depth of a particular node (labelID)
+ */
+ public int getDepth(String label) {
+ return getDepth(ConceptTreeNode.makeBasicTypedNode(label));
+ }
+
+ /**
+ * Returns all the parent nodes of a particular node (labelID)
+ */
+ public List> getAllParents(String label) {
+ List> parentNodes =
+ getAllParents(ConceptTreeNode.makeBasicTypedNode(label));
+
+ if (parentNodes == null)
+ return null;
+
+ List> parents = new ArrayList<>(parentNodes.size());
+
+ parents.addAll(parentNodes);
+
+ return parentNodes;
+ }
+
+ /**
+ * This function initializes the representations of the nodes using the LabelTree and the Embedding Objects
+ * -- Uses the LabelTree as the Tree Structure, and
+ * -- Uses the labelDescription of each node to get the corresponding vector representation
+ */
+ public void initializeTree() {
+ for (ConceptTreeNode node : getNodes()) {
+ String labelID = node.getLabelID();
+ String description = labelTree.getLabelDescription(labelID);
+
+ node.setLabelDescription(description);
+
+ SparseVector concepts =
+ embedding.getVector(node.getLabelDescription(), numConcepts);
+ concepts.updateNorm(globalConceptWeights);
+
+ node.setConceptVector(concepts);
+ }
+ }
+
+ /**
+ * This Utility function takes multiple ConceptTrees as input, and returns a ConceptTree
+ * that averages the representations at each node.
+ */
+ public static ConceptTree getAvgConceptTree(
+ List> conceptTreeList) {
+ ConceptTree avgTree = new ConceptTree<>(conceptTreeList.get(0));
+
+ for (ConceptTreeNode node : avgTree.getNodes()) {
+ if (avgTree.isRoot(node))
+ continue;
+
+ String currentLabelID = node.getLabelID();
+ List> vectors = new ArrayList<>();
+
+ for (ConceptTree tree : conceptTreeList) {
+ vectors.add(tree.getNodeFromLabel(currentLabelID).getConceptVector());
+ }
+
+ SparseVector avgVector = SparseVectorOperations.averageMultipleVectors(vectors);
+ node.setConceptVector(avgVector);
+ }
+
+ return avgTree;
+ }
+
+ /**
+ * This Utility function dumps a text representation of the tree to the disk.
+ */
+ public void dumpTreeAsString(String filePath) {
+ try(FileWriter writer = new FileWriter(filePath)) {
+
+ List> nodeList = getBreadthOrderedNodeList();
+
+ for (ConceptTreeNode node : nodeList) {
+ String parent;
+
+ if (isRoot(node))
+ parent = NIL;
+ else
+ parent = getParent(node).getLabelID();
+
+ writer.write(parent + "\t" + node.getLabelID() + "\t" + node.getLabelDescription()
+ + "\t" + node.getConceptVector().toString() + "\n");
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ logger.error("Error writing to file at " + filePath);
+ throw new RuntimeException("Error writing to file at " + filePath);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ /**
+ * Reads and returns a serialized ConceptTree from a file
+ */
+ public static ConceptTree loadTree(String labelRepFile) {
+ try(ObjectInputStream in = new ObjectInputStream(new FileInputStream(labelRepFile))) {
+ ConceptTree tree = (ConceptTree) in.readObject();
+ return tree;
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ logger.error("File not found at " + labelRepFile);
+ throw new RuntimeException("File not found at " + labelRepFile);
+ } catch (IOException e) {
+ e.printStackTrace();
+ logger.error("Error reading from file");
+ throw new RuntimeException("Error reading from file");
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ logger.error("Error deserializing the ConceptTree");
+ throw new RuntimeException("Error deserializing the ConceptTree");
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("");
+
+ List> nodes = getBreadthOrderedNodeList();
+
+ for (ConceptTreeNode node : nodes) {
+ if (!isLeaf(node)) {
+ for (ConceptTreeNode child : getChildren(node)) {
+ sb.append(node.getLabelID()).append("\t");
+ sb.append(child).append("\n");
+ }
+ }
+ }
+
+ return sb.toString();
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTreeNode.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTreeNode.java
new file mode 100755
index 000000000..71bd9f197
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/ConceptTreeNode.java
@@ -0,0 +1,122 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.io.Serializable;
+
+import edu.illinois.cs.cogcomp.datalessclassification.hierarchy.TreeNode;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVector;
+
+/**
+ *
+ * The Node Class used by {@link ConceptTree} internally.
+ * Wraps labelDescription and vector representation of the node within it.
+ *
+ * ConceptTreeNode is to {@link ConceptTree}, as {@link LabelTreeNode} is to {@link LabelTree}
+ *
+ * Each ConceptTreeNode contains the vector representation for a node (labelID) in the LabelTree.
+ *
+ * @author yqsong@illinois.edu
+ * @author shashank
+ */
+
+public class ConceptTreeNode extends TreeNode {
+
+ private static final long serialVersionUID = 1L;
+
+ private String labelDescription;
+ private SparseVector conceptVector;
+
+ /**
+ * A convenience factory function to create a basic ConceptTreeNode
+ */
+ public static ConceptTreeNode makeBasicTypedNode(String labelID) {
+ ConceptTreeNode node = new ConceptTreeNode<>("", labelID, null);
+ return node;
+ }
+
+ /**
+ * A convenience factory function to create a ConceptTreeNode
+ */
+ public static ConceptTreeNode makeNode(String labelDesc,
+ String labelID, SparseVector conceptVector) {
+ ConceptTreeNode node = new ConceptTreeNode<>(labelDesc, labelID, conceptVector);
+ return node;
+ }
+
+ public ConceptTreeNode(String labelID) {
+ this("", labelID);
+ }
+
+ public ConceptTreeNode(String labelDesc, String labelID) {
+ this(labelDesc, labelID, null);
+ }
+
+ public ConceptTreeNode(String labelDesc, String labelID, SparseVector conceptVector) {
+ super(labelID);
+ setLabelDescription(labelDesc);
+ setConceptVector(conceptVector);
+ }
+
+ /**
+ * Returns the labelDescription of the node
+ */
+ public String getLabelDescription() {
+ return this.labelDescription;
+ }
+
+ /**
+ * Sets the labelDescription of the node
+ */
+ public void setLabelDescription(String labelDesc) {
+ this.labelDescription = labelDesc;
+ }
+
+ /**
+ * Returns the vector representation of the node
+ */
+ public SparseVector getConceptVector() {
+ return this.conceptVector;
+ }
+
+ /**
+ * Sets the vector representation of the node
+ */
+ public void setConceptVector(SparseVector vector) {
+ if (vector == null)
+ vector = new SparseVector<>();
+
+ this.conceptVector = vector;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof ConceptTreeNode>))
+ return false;
+
+ ConceptTreeNode other = (ConceptTreeNode) o;
+
+ return this.labelID.equals(other.getLabelID());
+ }
+
+ @Override
+ public int hashCode() {
+ return labelID.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("");
+
+ sb.append(labelID).append("\t");
+ sb.append(conceptVector);
+
+ return sb.toString();
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/DatalessClassifierML.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/DatalessClassifierML.java
new file mode 100755
index 000000000..4e7aadc14
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/DatalessClassifierML.java
@@ -0,0 +1,362 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
+import edu.illinois.cs.cogcomp.datalessclassification.config.DatalessConfigurator;
+import edu.illinois.cs.cogcomp.datalessclassification.util.HashSort;
+import edu.illinois.cs.cogcomp.datalessclassification.util.LabelScorePair;
+import edu.illinois.cs.cogcomp.datalessclassification.util.LabelResultTree;
+import edu.illinois.cs.cogcomp.datalessclassification.util.LabelResultTreeNode;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVector;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVectorOperations;
+
+/**
+ * The class which implements various Inference algorithms for the Multi-Label Hierarchical Dataless Classification.
+ * - Supports both Bottom-Up and Top-Down inference
+ * - Provides support for controlling the minimum and maximum number of labels selected at each level
+ * - Provides support for controlling the number of labels selected at a level based on the cumulative similarity score
+ * - Provides functions to retrieve just a flat-list of selected labels, or the full depth-level classification information
+ *
+ * @author yqsong@illinois.edu
+ * @author shashank
+ */
+public class DatalessClassifierML implements IConceptClassificationTree {
+ private ConceptTree conceptTree;
+
+ private boolean bottomUp;
+
+ private double classifierThreshold;
+ private int classifierLeastK;
+ private int classifierMaxK;
+
+ public DatalessClassifierML(ResourceManager config, ConceptTree conceptTree) {
+ this.conceptTree = conceptTree;
+
+ this.bottomUp = config.getBoolean(DatalessConfigurator.BottomUp_Inference.key);
+ this.classifierThreshold = config.getDouble(DatalessConfigurator.classifierThreshold.key);
+ this.classifierLeastK = config.getInt(DatalessConfigurator.classifierLeastK.key);
+ this.classifierMaxK = config.getInt(DatalessConfigurator.classifierMaxK.key);
+ }
+
+ @Override
+ public Map> getFullDepthPredictions(SparseVector docVector) {
+ return getFullDepthPredictions(docVector, new HashMap<>());
+ }
+
+ /**
+ * Overrides getFullDepthPredictions to provide additional functionality for providing different weights to different dimensions
+ * of the underlying embedding
+ */
+ public Map> getFullDepthPredictions(SparseVector docVector,
+ Map conceptWeights) {
+ if (bottomUp)
+ return getFullDepthPredictionsBottomUp(docVector, conceptWeights);
+ else
+ return getFullDepthPredictionsTopDown(docVector, conceptWeights);
+ }
+
+ /**
+ * Selects some leaf nodes (using either TopK or a score based Threshold), and then selects their path to the root as the
+ * output label set, while reusing the scores of the leaf labels.
+ *
+ * Returns a Map, where key is the depth, and value is a list of selected labelIDs at that depth with their absolute similarity scores
+ */
+ private Map> getFullDepthPredictionsBottomUp(SparseVector docVector, Map conceptWeights) {
+ double classifierMLThreshold = classifierThreshold;
+ int leastK = classifierLeastK;
+ int maxK = classifierMaxK;
+
+ Set> leafSet = conceptTree.getLeafSet();
+
+ Map orgSimilarities = new HashMap<>();
+ Map normalizedSimilarities = new HashMap<>();
+
+
+ /**
+ * We calculate normalized similarities so as to be able to threshold at a particular
+ * (absolute) value while selecting the labels
+ */
+ double maxSimilarity = 0 - Double.MAX_VALUE;
+ double minSimilarity = Double.MAX_VALUE;
+
+ for (ConceptTreeNode leafNode : leafSet) {
+ double similarity =
+ SparseVectorOperations.cosine(leafNode.getConceptVector(), docVector,
+ conceptWeights);
+ orgSimilarities.put(leafNode.getLabelID(), similarity);
+
+ if (similarity > maxSimilarity) {
+ maxSimilarity = similarity;
+ }
+
+ if (similarity < minSimilarity) {
+ minSimilarity = similarity;
+ }
+ }
+
+ if (minSimilarity < 0) {
+ for (String labelID : orgSimilarities.keySet()) {
+ orgSimilarities.put(labelID, orgSimilarities.get(labelID) - minSimilarity);
+ maxSimilarity = maxSimilarity - minSimilarity;
+ minSimilarity = 0;
+ }
+ }
+
+ double sumSimilarity = 0;
+
+ for (String leafLabel : orgSimilarities.keySet()) {
+ double value =
+ (orgSimilarities.get(leafLabel) - minSimilarity)
+ / (maxSimilarity - minSimilarity + Double.MIN_VALUE);
+
+ if (orgSimilarities.size() == 1) {
+ value = 1;
+ }
+
+ normalizedSimilarities.put(leafLabel, value);
+ sumSimilarity += value;
+ }
+
+ for (String leafLabel : normalizedSimilarities.keySet()) {
+ normalizedSimilarities.put(leafLabel, normalizedSimilarities.get(leafLabel)
+ / (sumSimilarity + Double.MIN_VALUE));
+ }
+
+ Map> depthLabelMap = new HashMap<>();
+
+ TreeMap sortedSimilarities = HashSort.sortByValues(normalizedSimilarities);
+
+ double ratio = 0;
+ int labelCount = 0;
+
+ /**
+ * Basically the portion of the code below selects certain leaf nodes (either by similarity
+ * threshold or by topK), and selects their path to the root in the tree -- with their
+ * scores being used as the scores of their leaf nodes.
+ */
+
+ for (String leafLabelID : sortedSimilarities.keySet()) {
+ ratio += normalizedSimilarities.get(leafLabelID);
+
+ if ((ratio < classifierMLThreshold && labelCount < maxK) || labelCount < leastK) {
+ String labelID = leafLabelID;
+ double leafSimilarity = orgSimilarities.get(leafLabelID);
+
+ while (labelID != null) {
+ int depth = conceptTree.getDepth(labelID);
+
+ if (!depthLabelMap.containsKey(depth)) {
+ depthLabelMap.put(depth, new ArrayList<>());
+ }
+
+ LabelScorePair labelPair = new LabelScorePair(labelID, leafSimilarity);
+ depthLabelMap.get(depth).add(labelPair);
+
+ labelID = conceptTree.getLabelTree().getParent(labelID);
+ }
+ }
+
+ labelCount++;
+ }
+
+ return depthLabelMap;
+ }
+
+ /**
+ * Gets the DepthPredictions (using either bottomUp or topDown) and then just returns
+ * a flat-bag of selected labelIDs (independent of their depth in the tree)
+ *
+ * Use this function if you just want a flat-list of selected labelIDs from the tree, where only topK labels
+ * have been selected at each level
+ */
+ @Override
+ public Set getFlatPredictions(SparseVector docVector, int topK) {
+ Map> testDepthLabelMap = getPrunedDepthPredictions(docVector, topK);
+
+ Set predictedLabels = new HashSet<>();
+
+ for (Set labels : testDepthLabelMap.values()) {
+ predictedLabels.addAll(labels);
+ }
+
+ return predictedLabels;
+ }
+
+ /**
+ * Gets the FullPredictions (using either bottomUp or topDown) and then selects at most topK labels at each level
+ *
+ * Return a Map, where key is the Depth, and the value is the Set of selected labelIDs at the depth
+ *
+ * Use this function when you want the depth information associated with the selected labelIDs as well, and want to
+ * limit the number of labels selected at each depth
+ */
+ @Override
+ public Map> getPrunedDepthPredictions(SparseVector docVector, int topK) {
+ Map> testDepthLabelMap = new HashMap<>();
+
+ Map> labelResultsInDepth = getFullDepthPredictions(docVector);
+
+ for (int depth : labelResultsInDepth.keySet()) {
+ /**
+ TODO: This block assumes that Depth = 0 will always be the Root Node of the Tree
+ TODO: However, instead of the actual root node provided by the end-user, the underlying tree implementation might use a placeholder
+ TODO: for the root node, and thus this check might lead to some logical errors later.
+
+ TODO: Thus, this is very risky and needs to go once the labelTree and ConceptTree classes have been refactored
+ */
+ if (depth == 0)
+ continue;
+
+ List classifiedLabelList = labelResultsInDepth.get(depth);
+
+ if (classifiedLabelList == null) {
+ classifiedLabelList = new ArrayList<>();
+ }
+
+ Set classifiedLabelSet = new HashSet<>();
+
+ for (int i = 0; i < Math.min(topK, classifiedLabelList.size()); i++) {
+ //This check is currently required since labelIDs are represented as a String, and thus they might clash
+ if (!classifiedLabelSet.contains(classifiedLabelList.get(i).getLabelID()))
+ classifiedLabelSet.add(classifiedLabelList.get(i).getLabelID());
+ }
+
+ testDepthLabelMap.put(depth, classifiedLabelSet);
+ }
+
+ return testDepthLabelMap;
+ }
+
+ /**
+ * Selects at most K children per node, while traversing Top-Down in the tree.
+ */
+ private Map> getFullDepthPredictionsTopDown(SparseVector documentConceptVector, Map conceptWeights) {
+ LabelResultTree labelResult = new LabelResultTree();
+ LabelScorePair labelPair = new LabelScorePair(conceptTree.getRootLabel(), 1);
+
+ LabelResultTreeNode resultTreeRootNode = labelResult.getRootNode();
+
+ resultTreeRootNode.setLabelScorePair(labelPair);
+ resultTreeRootNode.setDepth(0);
+
+ retrieveLabelTopDown(documentConceptVector, conceptTree.getRoot(), resultTreeRootNode, conceptWeights);
+
+ Map> labelResultsInDepth = labelResult.getFullDepthPredictions();
+ return labelResultsInDepth;
+ }
+
+ /**
+ * Recursive function
+ *
+ * Overall, given the Root of a ConceptTree, this function selects at most K children for each Node,
+ * creates a corresponding LabelResultTree, and returns the root of that Tree.
+ */
+ private void retrieveLabelTopDown(SparseVector docConceptVector,
+ ConceptTreeNode conceptTreeRootNode, LabelResultTreeNode resultTreeRootNode,
+ Map conceptWeights) {
+
+ int maxK = classifierMaxK;
+
+ Map orgSimilarities = new HashMap<>();
+ Map similarities = new HashMap<>();
+ Map> labelIdNodeMap = new HashMap<>();
+
+ double maxSimilarity = 0 - Double.MAX_VALUE;
+ double minSimilarity = Double.MAX_VALUE;
+
+ for (ConceptTreeNode childNode : conceptTree.getChildren(conceptTreeRootNode)) {
+ double similarity =
+ SparseVectorOperations.cosine(docConceptVector, childNode.getConceptVector(),
+ conceptWeights);
+
+ orgSimilarities.put(childNode.getLabelID(), similarity);
+ labelIdNodeMap.put(childNode.getLabelID(), childNode);
+
+ if (similarity > maxSimilarity) {
+ maxSimilarity = similarity;
+ }
+
+ if (similarity < minSimilarity) {
+ minSimilarity = similarity;
+ }
+ }
+
+ if (minSimilarity < 0) {
+ for (String labelID : orgSimilarities.keySet()) {
+ orgSimilarities.put(labelID, orgSimilarities.get(labelID) - minSimilarity);
+ maxSimilarity = maxSimilarity - minSimilarity;
+ minSimilarity = 0;
+ }
+ }
+
+ double sumSimilarity = 0;
+
+ for (String labelID : orgSimilarities.keySet()) {
+ double value =
+ (orgSimilarities.get(labelID) - minSimilarity)
+ / (maxSimilarity - minSimilarity + Double.MIN_VALUE);
+
+ if (orgSimilarities.size() == 1) {
+ value = 1;
+ }
+
+ similarities.put(labelID, value);
+ sumSimilarity += value;
+ }
+
+ for (String labelID : similarities.keySet()) {
+ similarities.put(labelID, similarities.get(labelID) / (sumSimilarity + Double.MIN_VALUE));
+ }
+
+ if (conceptTree.getChildCount(conceptTreeRootNode) == 0) {
+ resultTreeRootNode.setIsLeaf(true);
+ } else {
+ resultTreeRootNode.setIsLeaf(false);
+ }
+
+ TreeMap sortedSimilarities = HashSort.sortByValues(similarities);
+
+ int labelCount = 0;
+
+ List labelResultChildNodes = resultTreeRootNode.getChildren();
+
+ if (sumSimilarity > 0) {
+ for (String labelID : sortedSimilarities.keySet()) {
+ if (labelCount < maxK && similarities.get(labelID) > 0) {
+ LabelScorePair labelPair =
+ new LabelScorePair(labelID, orgSimilarities.get(labelID));
+
+ LabelResultTreeNode labelResultChildNode = new LabelResultTreeNode();
+ labelResultChildNode.setLabelScorePair(labelPair);
+ labelResultChildNode.setDepth(resultTreeRootNode.getDepth() + 1);
+
+ labelResultChildNodes.add(labelResultChildNode);
+
+ retrieveLabelTopDown(docConceptVector, labelIdNodeMap.get(labelID), labelResultChildNode,
+ conceptWeights);
+ }
+
+ labelCount++;
+
+ if (labelCount >= maxK) {
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/IConceptClassificationTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/IConceptClassificationTree.java
new file mode 100755
index 000000000..259ebafd1
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/IConceptClassificationTree.java
@@ -0,0 +1,50 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.illinois.cs.cogcomp.datalessclassification.util.LabelScorePair;
+import edu.illinois.cs.cogcomp.datalessclassification.util.SparseVector;
+
+/**
+ * An Inference Interface, to be implemented by all variants of Dataless Classifier
+ *
+ * @author yqsong@illinois.edu
+ * @author shashank
+ */
+
+public interface IConceptClassificationTree {
+
+ /**
+ * Returns a Map, where key is the depth, and value is a list of selected labelIDs at that depth with their absolute similarity scores
+ *
+ * If a particular implementation wants the end-user to be able to select a particular inference algorithm, this function
+ * should internally redirect to the relevant functions
+ */
+ Map> getFullDepthPredictions(SparseVector vector);
+
+
+ /**
+ * Return a Map, where key is the Depth, and the value is the Set of selected topK labelIDs at that depth
+ *
+ * Should ideally call getFullDepthPredictions internally and select the topK labels at each depth.
+ *
+ * Use this function when you want the depth information associated with the selected labelIDs as well, and want to
+ * limit the number of labels selected at each depth
+ */
+ Map> getPrunedDepthPredictions(SparseVector docVector, int topK);
+
+ /**
+ * Returns just a flat-bag of selected labelIDs (independent of their depth in the tree)
+ */
+ Set getFlatPredictions(SparseVector docVector, int topK);
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTree.java
new file mode 100755
index 000000000..431c28c57
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTree.java
@@ -0,0 +1,553 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.illinois.cs.cogcomp.datalessclassification.hierarchy.SimpleTree;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class represents the user-desired label ontology/hierarchy, in which each node has a
+ * - labelID
+ * - labelName
+ * - labelDescription
+ *
+ * @author shashank
+ */
+
+public class LabelTree extends SimpleTree {
+
+ private static Logger logger = LoggerFactory.getLogger(LabelTree.class);
+ private static final long serialVersionUID = 1L;
+
+ private String root_label;
+
+ public LabelTree() {
+ this("root");
+ }
+
+ public LabelTree(String root) {
+ super();
+ this.root_label = root;
+ initializeRoot(root_label);
+ }
+
+ /**
+ * Copy Constructor
+ */
+ public LabelTree(LabelTree thatTree) {
+ this(thatTree.root_label);
+
+ Set topNodes = thatTree.getChildren(thatTree.root_label);
+
+ Map> childMap = new HashMap<>();
+ Map labelNameMap = new HashMap<>();
+ Map labelDescriptionMap = new HashMap<>();
+
+ for (LabelTreeNode labelNode : thatTree.getNodes()) {
+ String labelID = labelNode.getLabelID();
+
+ if (!thatTree.isLeaf(labelNode)) {
+ childMap.put(labelID, thatTree.getChildren(labelID));
+ }
+
+ labelNameMap.put(labelID, labelNode.getLabelName());
+ labelDescriptionMap.put(labelID, labelNode.getLabelDescription());
+ }
+
+ initializeTree(topNodes, childMap, labelNameMap, labelDescriptionMap);
+ }
+
+ /**
+ * Initializes the root node of the tree
+ */
+ private void initializeRoot(String root_label) {
+ LabelTreeNode root = LabelTreeNode.makeBasicNode(root_label);
+ initializeRoot(root);
+ }
+
+ /**
+ * Initializes the Tree completely, given the:
+ * @param topNodes: Set containing the labelIDs of the top-level nodes in the tree
+ * @param childMap: Map containing the parentID - childIds mapping
+ * @param labelNameMap: Map containing the labelID - labelName mapping
+ * @param labelDescriptionMap: Map containing the labelID - labelDescription mapping
+ */
+ public void initializeTree(Set topNodes, Map> childMap,
+ Map labelNameMap, Map labelDescriptionMap) {
+
+ initializeTreeStructure(topNodes, childMap);
+ initializeLabelNames(labelNameMap);
+ initializeLabelDescriptions(labelDescriptionMap);
+ }
+
+ /**
+ * Initializes the structure of the Tree, given the:
+ * @param topNodes: Set containing the labelIDs of the top-level nodes in the tree
+ * @param childMap: Map containing the parentID - childIds mapping
+ */
+ public void initializeTreeStructure(Set topNodes, Map> childMap) {
+ List exploreNodes = new ArrayList<>();
+
+ for (String topNode : topNodes)
+ addEdge(root_label, topNode);
+
+ exploreNodes.addAll(topNodes);
+
+ while (exploreNodes.size() != 0) {
+ String node = exploreNodes.get(0);
+
+ if (childMap.containsKey(node)) {
+ Set children = childMap.get(node);
+ addEdges(node, children);
+ exploreNodes.addAll(children);
+ }
+
+ exploreNodes.remove(0);
+ }
+ }
+
+ /**
+ * Clears the labelNames for all the nodes
+ */
+ public void clearLabelNames() {
+ for (LabelTreeNode node : getNodes())
+ node.setLabelName("");
+ }
+
+ /**
+ * Sets the labelNames from the labelID -> labelName map
+ */
+ public void initializeLabelNames(Map labelNameMap) {
+ for (String labelId : labelNameMap.keySet())
+ setLabelName(labelId, labelNameMap.get(labelId));
+ }
+
+ /**
+ * Clears the labelDescriptions for all the nodes
+ */
+ public void clearLabelDescriptions() {
+ for (LabelTreeNode node : getNodes())
+ node.setLabelDescription("");
+ }
+
+ /**
+ * Sets the labelDescriptions from the labelID -> labelDescription map
+ */
+ public void initializeLabelDescriptions(Map labelDescriptionMap) {
+ for (String labelId : labelDescriptionMap.keySet())
+ setLabelDescription(labelId, labelDescriptionMap.get(labelId));
+ }
+
+ /**
+ * Adds a new node to the Tree, given the:
+ * @param parent: the labelId of the parent (the parent node should exist before adding the children)
+ * @param labelId: the labelId of the node
+ * @param labelName: the labelName of the node
+ * @param labelDesc: the labelDescription of the node
+ */
+ protected boolean addNode(String parent, String labelId, String labelName, String labelDesc) {
+ LabelTreeNode node = new LabelTreeNode(labelId, labelName, labelDesc);
+ LabelTreeNode parentNode = LabelTreeNode.makeBasicNode(parent);
+
+ return addEdge(parentNode, node);
+ }
+
+ /**
+ * Returns the labelIDs of the children of a particular node (labelID)
+ */
+ public Set getChildren(String labelId) {
+ Set set = getChildren(LabelTreeNode.makeBasicNode(labelId));
+
+ if (set == null)
+ return null;
+
+ if (set.isEmpty())
+ return Collections.emptySet();
+
+ Set labelSet = new HashSet<>(set.size());
+
+ for (LabelTreeNode node : set) {
+ labelSet.add(node.getLabelID());
+ }
+
+ return labelSet;
+ }
+
+ /**
+ * Returns the labelIDs of the leaf nodes in the tree
+ */
+ public Set getLeafLabels() {
+ Set set = getLeafSet();
+
+ if (set == null)
+ return null;
+
+ if (set.isEmpty())
+ return Collections.emptySet();
+
+ Set newSet = new HashSet<>(set.size());
+
+ for (LabelTreeNode node : set) {
+ newSet.add(node.getLabelID());
+ }
+
+ return newSet;
+ }
+
+ /**
+ * Returns the labelID of a parent of a particular node (labelID)
+ */
+ public String getParent(String labelId) {
+ LabelTreeNode parent = getParent(LabelTreeNode.makeBasicNode(labelId));
+
+ if (parent == null)
+ return null;
+
+ return parent.getLabelID();
+ }
+
+ /**
+ * Returns the labelName of a particular node (labelID) in the tree
+ */
+ public String getLabelName(String labelId) {
+ LabelTreeNode node = getNode(LabelTreeNode.makeBasicNode(labelId));
+
+ if (node == null)
+ return null;
+
+ return node.getLabelName();
+ }
+
+ /**
+ * Sets the labelName of a particular node (labelID) in the tree
+ */
+ public boolean setLabelName(String labelId, String labelName) {
+ LabelTreeNode node = getNode(LabelTreeNode.makeBasicNode(labelId));
+
+ if (node == null)
+ return false;
+
+ node.setLabelName(labelName);
+ return true;
+ }
+
+ /**
+ * Returns the labelDescription of a particular node (labelID) in the tree
+ */
+ public String getLabelDescription(String labelId) {
+ LabelTreeNode node = getNode(LabelTreeNode.makeBasicNode(labelId));
+
+ if (node == null)
+ return null;
+
+ return node.getLabelDescription();
+ }
+
+ /**
+ * Sets the labelDescription of a particular node (labelID) in the tree
+ */
+ public boolean setLabelDescription(String labelId, String labelDesc) {
+ LabelTreeNode node = getNode(LabelTreeNode.makeBasicNode(labelId));
+
+ if (node == null)
+ return false;
+
+ node.setLabelDescription(labelDesc);
+ return true;
+ }
+
+ /**
+ * Adds an edge between a parentNode and a childNode
+ */
+ protected boolean addEdge(String parent, String child) {
+ LabelTreeNode parentNode = LabelTreeNode.makeBasicNode(parent);
+ LabelTreeNode childNode = LabelTreeNode.makeBasicNode(child);
+
+ return addEdge(parentNode, childNode);
+ }
+
+ /**
+ * Adds edges between a parent node and child nodes
+ */
+ protected boolean addEdges(String parent, Set children) {
+ boolean success = true;
+
+ for (String child : children) {
+ success = addEdge(parent, child);
+
+ if (!success)
+ break;
+ }
+
+ return success;
+ }
+
+ /**
+ * Returns the Depth of a particular node (labelID) in the tree
+ */
+ public int getDepth(String labelId) {
+ LabelTreeNode node = LabelTreeNode.makeBasicNode(labelId);
+ return getDepth(node);
+ }
+
+ /**
+ * Returns all the ancestors of a particular node (labelID) in the tree
+ */
+ public List getAllParents(String labelId) {
+ List parentNodes = getAllParents(LabelTreeNode.makeBasicNode(labelId));
+
+ if (parentNodes == null)
+ return null;
+
+ List parents = new ArrayList<>(parentNodes.size());
+
+ for (LabelTreeNode p : parentNodes) {
+ parents.add(p.getLabelID());
+ }
+
+ return parents;
+ }
+
+ /**
+ * Traverses the Tree in a Breadth-First order and returns the labelIDs
+ */
+ public List getBreadthOrderedLabelList() {
+ List nodes = getBreadthOrderedNodeList();
+
+ if (nodes == null)
+ return null;
+
+ List labelIds = new ArrayList<>(nodes.size());
+
+ for (LabelTreeNode p : nodes) {
+ labelIds.add(p.getLabelID());
+ }
+
+ return labelIds;
+ }
+
+ /**
+ * Traverses the Tree in a (Pre-Order) Depth-First order and returns the labelIDs
+ */
+ public List getDepthOrderedLabelList() {
+ List nodes = getDepthOrderedNodeList();
+
+ if (nodes == null)
+ return null;
+
+ List labelIds = new ArrayList<>(nodes.size());
+
+ for (LabelTreeNode p : nodes) {
+ labelIds.add(p.getLabelID());
+ }
+
+ return labelIds;
+ }
+
+ /**
+ * Returns whether the provided labelID corresponds to a leaf in the Tree or not
+ */
+ public boolean isLeaf(String labelId) {
+ LabelTreeNode node = LabelTreeNode.makeBasicNode(labelId);
+ return isLeaf(node);
+ }
+
+ /**
+ * A utility function that appends the label descriptions of the child nodes
+ * to their parents' description
+ *
+ * Since nodes in a topic/label hierarchy usually follow IS-A property, this function can enrich
+ * the descriptions of the parent nodes
+ */
+ public void aggregateChildrenDescription() {
+ List nodeList = getBreadthOrderedNodeList();
+
+ Collections.reverse(nodeList);
+
+ for (LabelTreeNode node : nodeList) {
+ String childDesc = getLabelDescription(node.getLabelID());
+
+ if (!isRoot(node)) {
+ LabelTreeNode parent = getParent(node);
+ String parentDesc = getLabelDescription(parent.getLabelID());
+
+ String newLabelDesc = parentDesc.trim() + " " + childDesc.trim();
+
+ setLabelDescription(parent.getLabelID(), newLabelDesc);
+ }
+ }
+ }
+
+ /**
+ * A Utility function that just appends the labelName to the labelDescription.
+ */
+ public void appendLabelNameToDesc() {
+ for (LabelTreeNode node : getNodes()) {
+ String labelId = node.getLabelID();
+
+ String description = getLabelDescription(labelId) + " " +
+ getLabelName(labelId);
+
+ node.setLabelDescription(description);
+ }
+ }
+
+ /**
+ * A Utility function that just copies the labelNames to labelDescriptions.
+ *
+ * In scenarios, where users don't provide descriptions for their labels, this function can
+ * be used a last resort for Dataless Classification
+ */
+ public void copyLabelNameToDesc() {
+ for (LabelTreeNode node : getNodes()) {
+ String labelId = node.getLabelID();
+ String labelName = getLabelName(labelId);
+ node.setLabelDescription(labelName);
+ }
+ }
+
+ /**
+ * Returns the labelIDs of all nodes at the same level as the provided node (labelID)
+ */
+ public Set getSameLevelLabels(String labelId) {
+ Set nodes = getSameLevelNodes(labelId);
+
+ if (nodes == null)
+ return null;
+
+ Set output = new HashSet<>(nodes.size());
+
+ for (LabelTreeNode p : nodes) {
+ output.add(p.getLabelID());
+ }
+
+ return output;
+ }
+
+ /**
+ * Returns all the nodes at the same level as the provided node (labelID)
+ */
+ public Set getSameLevelNodes(String labelId) {
+ int depth = getDepth(labelId);
+
+ if (depth == -1)
+ return null;
+
+ List nodes = getBreadthOrderedNodeList();
+
+ Set output = new HashSet<>();
+
+ for (LabelTreeNode node : nodes) {
+ int thisDepth = getDepth(node);
+
+ if (thisDepth > depth)
+ break;
+ else if (thisDepth == depth)
+ output.add(node);
+ }
+
+ return output;
+ }
+
+ /**
+ * A utility function which can be used to identify the top-level nodes in the tree,
+ * if such an information is not explicitly provided by the end-user.
+ *
+ * This function uses the parent-children map to identify the top-level nodes.
+ */
+ public static Set identifyTopNodes(Map> childMap) {
+ Set candidateTopNodes = new HashSet<>();
+ Set topNodes = new HashSet<>();
+
+ Set children = new HashSet<>();
+
+ for (String parent : childMap.keySet()) {
+ if (!children.contains(parent))
+ candidateTopNodes.add(parent);
+
+ children.addAll(childMap.get(parent));
+ }
+
+ for (String candidate : candidateTopNodes) {
+ if (!children.contains(candidate))
+ topNodes.add(candidate);
+ }
+
+ return topNodes;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof LabelTree))
+ return false;
+
+ LabelTree that = (LabelTree) o;
+
+ if (!root_label.equals(that.root_label))
+ return false;
+
+ if (!super.equals(that))
+ return false;
+
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("");
+
+ List nodes = getBreadthOrderedNodeList();
+
+ for (LabelTreeNode node : nodes) {
+ if (!isLeaf(node)) {
+ for (LabelTreeNode child : getChildren(node)) {
+ sb.append(node.getLabelID()).append("\t").append(child.getLabelID()).append("\n");
+ }
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * This Utility function dumps a text representation of the tree to the disk.
+ */
+ public void dumpTreeLabelDesc(String outPath) {
+ try(BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outPath)))) {
+ StringBuilder sb = new StringBuilder("");
+
+ List nodes = getBreadthOrderedNodeList();
+
+ for (LabelTreeNode node : nodes) {
+ if (isRoot(node))
+ continue;
+
+ sb.append(node.getLabelID()).append("\t").append(node.getLabelDescription()).append("\n");
+ }
+
+ bw.write(sb.toString());
+ } catch (IOException e) {
+ e.printStackTrace();
+ logger.error("IO Error while writing file");
+ throw new RuntimeException("IO Error while writing file");
+ }
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTreeNode.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTreeNode.java
new file mode 100755
index 000000000..f8d6846ef
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/classifier/LabelTreeNode.java
@@ -0,0 +1,102 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.classifier;
+
+import edu.illinois.cs.cogcomp.datalessclassification.hierarchy.TreeNode;
+
+/**
+ * The Node Class used by {@link LabelTree} internally.
+ * Wraps labelID, labelName and labelDescription within it.
+ *
+ * @author shashank
+ */
+
+public class LabelTreeNode extends TreeNode {
+
+ private static final long serialVersionUID = 1L;
+ private String labelName;
+ private String labelDescription;
+
+ /**
+ * A convenience factory function to create a basic LabelTreeNode
+ */
+ public static LabelTreeNode makeBasicNode(String labelID) {
+ LabelTreeNode node = new LabelTreeNode(labelID, "", "");
+ return node;
+ }
+
+ /**
+ * Copy Constructor
+ */
+ public LabelTreeNode(LabelTreeNode thatNode) {
+ this(thatNode.getLabelID(), thatNode.getLabelName(), thatNode.getLabelDescription());
+ }
+
+ /**
+ * Initializes the Node with the provided labelID, labelName and labelDescription
+ */
+ LabelTreeNode(String labelID, String labelName, String labelDesc) {
+ super(labelID);
+ setLabelName(labelName);
+ setLabelDescription(labelDesc);
+ }
+
+ /**
+ * Gets the LabelDescription for the node
+ */
+ String getLabelDescription() {
+ return this.labelDescription;
+ }
+
+ /**
+ * Sets the LabelDescription for the node
+ */
+ void setLabelDescription(String description) {
+ this.labelDescription = description;
+ }
+
+ /**
+ * Gets the LabelName for the node
+ */
+ String getLabelName() {
+ return this.labelName;
+ }
+
+ /**
+ * Sets the LabelName for the node
+ */
+ void setLabelName(String labelName) {
+ this.labelName = labelName;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof LabelTreeNode))
+ return false;
+
+ LabelTreeNode other = (LabelTreeNode) o;
+
+ return this.labelID.equals(other.getLabelID());
+ }
+
+ @Override
+ public int hashCode() {
+ return labelID.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("");
+
+ sb.append(labelID).append("\t");
+ sb.append(labelName).append("\t");
+ sb.append(labelDescription);
+
+ return sb.toString();
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/DatalessConfigurator.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/DatalessConfigurator.java
new file mode 100644
index 000000000..50a882508
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/DatalessConfigurator.java
@@ -0,0 +1,34 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.config;
+
+import edu.illinois.cs.cogcomp.core.utilities.configuration.Configurator;
+import edu.illinois.cs.cogcomp.core.utilities.configuration.Property;
+
+/**
+ * The basic Configurator used by various variants of the Dataless Annotator
+ *
+ * @author shashank
+ */
+public abstract class DatalessConfigurator extends Configurator {
+ public static final Property BottomUp_Inference = new Property("inferenceBottomUp", "True");
+
+ public static final Property JSON_Hierarchy_Path = new Property("jsonHierarchyPath", "");
+
+ public static final Property LabelHierarchy_Path = new Property("labelHierarchyPath",
+ "hierarchies/20newsgroups/parentChildIdMap.txt");
+ public static final Property LabelName_Path = new Property("labelNamePath",
+ "hierarchies/20newsgroups/idToLabelNameMap.txt");
+ public static final Property LabelDesc_Path = new Property("labelDescPath",
+ "hierarchies/20newsgroups/labelDesc_Kws_embellished.txt");
+
+ public static final Property topK = new Property("topK", "1");
+ public static final Property classifierThreshold = new Property("classifierThreshold", "0.99");
+ public static final Property classifierLeastK = new Property("classifierLeastK", "1");
+ public static final Property classifierMaxK = new Property("classifierMaxK", "3");
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/ESADatalessConfigurator.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/ESADatalessConfigurator.java
new file mode 100644
index 000000000..9b45ecee3
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/ESADatalessConfigurator.java
@@ -0,0 +1,37 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.config;
+
+import edu.illinois.cs.cogcomp.core.utilities.configuration.Property;
+import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
+import edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator;
+
+/**
+ * The Configurator used by {@link ESADatalessAnnotator}
+ *
+ * @author shashank
+ */
+public class ESADatalessConfigurator extends DatalessConfigurator {
+
+ public static final Property ESA_DIM = new Property("esaDimension", "100");
+
+ /**
+ * get a ResourceManager object with the default key/value pairs for this configurator
+ *
+ * @return a non-null ResourceManager with appropriate values set.
+ */
+ @Override
+ public ResourceManager getDefaultConfig() {
+ Property[] props =
+ {ESA_DIM, BottomUp_Inference, JSON_Hierarchy_Path, LabelHierarchy_Path,
+ LabelName_Path, LabelDesc_Path, topK, classifierThreshold,
+ classifierLeastK, classifierMaxK};
+
+ return new ResourceManager(generateProperties(props));
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/W2VDatalessConfigurator.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/W2VDatalessConfigurator.java
new file mode 100644
index 000000000..6995183e9
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/config/W2VDatalessConfigurator.java
@@ -0,0 +1,37 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.config;
+
+import edu.illinois.cs.cogcomp.core.utilities.configuration.Property;
+import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
+import edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator;
+
+/**
+ * The Configurator used by {@link W2VDatalessAnnotator}
+ *
+ * @author shashank
+ */
+public class W2VDatalessConfigurator extends DatalessConfigurator {
+
+ public static final Property W2V_DIM = new Property("w2vDimension", "100");
+
+ /**
+ * get a ResourceManager object with the default key/value pairs for this configurator
+ *
+ * @return a non-null ResourceManager with appropriate values set.
+ */
+ @Override
+ public ResourceManager getDefaultConfig() {
+ Property[] props =
+ {W2V_DIM, BottomUp_Inference, JSON_Hierarchy_Path, LabelHierarchy_Path,
+ LabelName_Path, LabelDesc_Path, topK, classifierThreshold,
+ classifierLeastK, classifierMaxK};
+
+ return new ResourceManager(generateProperties(props));
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/SimpleTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/SimpleTree.java
new file mode 100644
index 000000000..cbfbe12a0
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/SimpleTree.java
@@ -0,0 +1,322 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.hierarchy;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * A Basic Tree implementation that satisfies project specific needs by wrapping around
+ * {@link UnorderedTree}
+ *
+ * @author shashank
+ */
+
+public class SimpleTree implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ protected UnorderedTree _tree;
+
+ protected int addedEdgeCnt = 0;
+
+ public SimpleTree() {
+ _tree = new UnorderedTree<>();
+ }
+
+ /**
+ * @param node the node whose number of children is to be returned
+ * @return the number of children that the {@code node} has
+ */
+ public int getChildCount(N node) {
+ return _tree.getChildCount(node);
+ }
+
+ /**
+ * Returns a set of node's children. If the node has no children then an empty set will be
+ * returned.
+ */
+ public Set getChildren(N node) {
+ return _tree.getChildren(node);
+ }
+
+ /**
+ * @return the depth of the node in this tree, or -1 if the node is not present in this tree
+ */
+ public int getDepth(N node) {
+ return _tree.getDepth(node);
+ }
+
+ /**
+ * Returns the height of the tree, or -1 if the tree is empty.
+ */
+ public int getHeight() {
+ return _tree.getHeight();
+ }
+
+ public N getParent(N node) {
+ return _tree.getParent(node);
+ }
+
+ public N getRoot() {
+ return _tree.getRoot();
+ }
+
+ public N getNode(N node) {
+ if (!containsNode(node))
+ return null;
+
+ if (isRoot(node))
+ return _tree.getRoot();
+
+ int edge = _tree.getParentEdge(node);
+
+ return _tree.getEndpoints(edge).getSecond();
+ }
+
+ /**
+ * Adds the specified node ({@code child}) as a child of the parent node ({@code parent}).
+ *
+ * @param parent node (must exist prior to addition)
+ * @param child node
+ * @return {@code true} if the graph has been modified
+ */
+ public boolean addEdge(N parent, N child) {
+ boolean success = _tree.addEdge(addedEdgeCnt + 1, parent, child);
+
+ if (success)
+ addedEdgeCnt++;
+
+ return success;
+ }
+
+ /**
+ * Initializes the tree with the given node. Can only be invoked once i.e. once the root is set,
+ * invoking this will throw an exception.
+ *
+ * @param node to be used as the root
+ * @return
+ */
+ public boolean initializeRoot(N node) {
+ return _tree.addVertex(node);
+ }
+
+ /**
+ * Returns true
if node
is a leaf of this tree, i.e., if it has no
+ * children.
+ *
+ * @param node the node to be queried
+ */
+ public boolean isLeaf(N node) {
+ return _tree.isLeaf(node);
+ }
+
+ /**
+ * Returns true iff v1
is the parent of v2
. Note that if
+ * v2
is the root and v1
is null
, this method still
+ * returns true
.
+ */
+ public boolean isParent(N v1, N v2) {
+ return _tree.isPredecessor(v1, v2);
+ }
+
+ /**
+ * Returns true
if the given node
is the root of this tree
+ *
+ * @param node the node to be queried
+ */
+ public boolean isRoot(N node) {
+ return _tree.isRoot(node);
+ }
+
+ /**
+ * Returns true iff v1
is the child of v2
. Note that if
+ * v2
is a leaf node and v1
is null
, this method returns
+ * true
.
+ */
+ public boolean isChild(N v1, N v2) {
+ return _tree.isSuccessor(v1, v2);
+ }
+
+ public boolean containsNode(N node) {
+ return _tree.containsVertex(node);
+ }
+
+ public boolean containsEdge(N v1, N v2) {
+ Integer edgeIndex = _tree.findEdge(v1, v2);
+
+ if (edgeIndex == null)
+ return false;
+ else
+ return true;
+ }
+
+ public int getNodeCount() {
+ return _tree.getVertexCount();
+ }
+
+ public Set getNodes() {
+ return _tree.getVertices();
+ }
+
+ public List getBreadthOrderedNodeList() {
+ List output = new ArrayList<>(_tree.getVertexCount());
+ List exploreNodes = new ArrayList<>();
+
+ exploreNodes.add(_tree.getRoot());
+
+ while (exploreNodes.size() != 0) {
+ N node = exploreNodes.get(0);
+ exploreNodes.remove(0);
+ output.add(node);
+
+ if (!isLeaf(node)) {
+ Set children = getChildren(node);
+ exploreNodes.addAll(children);
+ }
+ }
+
+ return output;
+ }
+
+ public List getDepthOrderedNodeList() {
+ List output = new ArrayList<>(_tree.getVertexCount());
+ List exploreNodes = new ArrayList<>();
+
+ exploreNodes.add(_tree.getRoot());
+
+ while (exploreNodes.size() != 0) {
+ N node = exploreNodes.get(0);
+ exploreNodes.remove(0);
+ output.add(node);
+
+ if (!isLeaf(node)) {
+ Set children = getChildren(node);
+
+ for (N child : children) {
+ exploreNodes.add(0, child);
+ }
+ }
+ }
+
+ return output;
+ }
+
+ public Set getSiblingsInclusive(N node) {
+ if (!_tree.containsVertex(node))
+ return null;
+
+ if (isRoot(node))
+ return Collections.singleton(node);
+
+ Set siblings = new HashSet<>();
+ siblings.addAll(getSiblingsExclusive(node));
+ siblings.add(node);
+
+ return new ImmutableSet.Builder().addAll(siblings).build();
+ }
+
+ public Set getSiblingsExclusive(N node) {
+ if (!_tree.containsVertex(node))
+ return null;
+
+ if (isRoot(node))
+ return null;
+
+ N parent = getParent(node);
+
+ Set siblings = new HashSet<>();
+
+ siblings.addAll(getChildren(parent));
+
+ siblings.remove(node);
+
+ return new ImmutableSet.Builder().addAll(siblings).build();
+ }
+
+ public List getAllParents(N node) {
+ if (!_tree.containsVertex(node))
+ return null;
+
+ if (isRoot(node))
+ return null;
+
+ List parents = new ArrayList<>();
+
+ N child = node;
+ N parent;
+
+ while ((parent = getParent(child)) != null) {
+ parents.add(parent);
+ child = parent;
+ }
+
+ return new ImmutableList.Builder().addAll(parents).build();
+ }
+
+ public Set getLeafSet() {
+ Set leafSet = new HashSet<>();
+
+ for (N node : getNodes()) {
+ if (isLeaf(node))
+ leafSet.add(node);
+ }
+
+ return new ImmutableSet.Builder().addAll(leafSet).build();
+ }
+
+ public Set getNodesAtSameLevel(N node) {
+ if (!containsNode(node))
+ return null;
+
+ int depth = getDepth(node);
+
+ return getAllNodesAtDepth(depth);
+ }
+
+ private Set getAllNodesAtDepth(int depth) {
+ if (getHeight() < depth)
+ return null;
+
+ if (getHeight() == depth)
+ return getLeafSet();
+
+ Set nodeSet = new HashSet<>();
+
+ for (N node : getNodes()) {
+ if (getDepth(node) == depth)
+ nodeSet.add(node);
+ }
+
+ return new ImmutableSet.Builder().addAll(nodeSet).build();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof SimpleTree>))
+ return false;
+
+ SimpleTree that = (SimpleTree) o;
+
+ if (this.addedEdgeCnt != that.addedEdgeCnt)
+ return false;
+
+ if (!this._tree.equals(that._tree))
+ return false;
+
+ return true;
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/TreeNode.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/TreeNode.java
new file mode 100755
index 000000000..b5ad52ff5
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/TreeNode.java
@@ -0,0 +1,79 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.hierarchy;
+
+import java.io.Serializable;
+
+/**
+ * The most basic Node class; contains just the ID of the label
+ *
+ * @author yqsong@illinois.edu
+ * @author shashank
+ */
+
+public class TreeNode implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ protected String labelID;
+
+ /**
+ * A convenience factory function to create a basic TreeNode
+ */
+ public static TreeNode makeBasicNode(String labelID) {
+ TreeNode node = new TreeNode(labelID);
+ return node;
+ }
+
+ /**
+ * Copy Constructor
+ */
+ public TreeNode(TreeNode thatNode) {
+ this(thatNode.getLabelID());
+ }
+
+ /**
+ * Initializes the TreeNode with the provided labelID
+ */
+ public TreeNode(String labelID) {
+ this.labelID = labelID;
+ }
+
+ /**
+ * Gets the LabelID for the node
+ */
+ public String getLabelID() {
+ return this.labelID;
+ }
+
+ /**
+ * Sets the LabelID for the node
+ */
+ public void setLabelID(String labelID) {
+ this.labelID = labelID;
+ }
+
+ @Override
+ public String toString() {
+ return labelID;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof TreeNode))
+ return false;
+
+ TreeNode other = (TreeNode) o;
+ return this.labelID.equals(other.labelID);
+ }
+
+ @Override
+ public int hashCode() {
+ return labelID.hashCode();
+ }
+}
diff --git a/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/UnorderedTree.java b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/UnorderedTree.java
new file mode 100644
index 000000000..2816bf32a
--- /dev/null
+++ b/dataless-classifier/src/main/java/edu/illinois/cs/cogcomp/datalessclassification/hierarchy/UnorderedTree.java
@@ -0,0 +1,696 @@
+/**
+ * This software is released under the University of Illinois/Research and Academic Use License. See
+ * the LICENSE file in the root folder for details. Copyright (c) 2016
+ *
+ * Developed by: The Cognitive Computation Group University of Illinois at Urbana-Champaign
+ * http://cogcomp.cs.illinois.edu/
+ */
+package edu.illinois.cs.cogcomp.datalessclassification.hierarchy;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
+
+import com.google.common.collect.ImmutableSet;
+
+import edu.uci.ics.jung.graph.AbstractTypedGraph;
+import edu.uci.ics.jung.graph.Tree;
+import edu.uci.ics.jung.graph.util.EdgeType;
+import edu.uci.ics.jung.graph.util.Pair;
+
+/**
+ * A Generic Tree Implementation, largely based on Jung's OrderedKAryTree (edu.uci.ics.jung.graph)
+ * -- adapted largely to discount the order of nodes, and to support arbitrary number of children --
+ * allows some data to be associated with Edges, which was not immediately required but is kind of a
+ * forward planning for the project
+ *
+ * @author shashank
+ */
+
+public class UnorderedTree extends
+ AbstractTypedGraph implements Tree {
+
+ private static final long serialVersionUID = 1L;
+
+ protected Map> edge_vpairs;
+ protected Map vertex_data;
+ protected int height;
+ protected V root;
+
+ public UnorderedTree() {
+ super(EdgeType.DIRECTED);
+ this.height = -1;
+ this.edge_vpairs = new HashMap<>();
+ this.vertex_data = new HashMap<>();
+ }
+
+ /**
+ * @param vertex the vertex whose number of children is to be returned
+ * @return the number of children that the {@code vertex} has
+ */
+ @Override
+ public int getChildCount(V vertex) {
+ if (!containsVertex(vertex))
+ return 0;
+
+ Set edges = vertex_data.get(vertex).child_edges;
+
+ if (edges == null)
+ return 0;
+
+ return edges.size();
+ }
+
+ @Override
+ public Set getChildEdges(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ Set edges = vertex_data.get(vertex).child_edges;
+
+ if (edges == null)
+ return Collections.emptySet();
+ else
+ return new ImmutableSet.Builder().addAll(edges).build();
+ }
+
+ /**
+ * Returns a set of vertex's child vertices. If the vertex has no children then an empty set
+ * will be returned.
+ */
+ @Override
+ public Set getChildren(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ Set edges = vertex_data.get(vertex).child_edges;
+
+ if (edges == null)
+ return Collections.emptySet();
+
+ Set children = new HashSet(edges.size());
+
+ for (E edge : edges)
+ children.add(this.getOpposite(vertex, edge));
+
+ return new ImmutableSet.Builder().addAll(children).build();
+ }
+
+ /**
+ * @return the depth of the vertex in this tree, or -1 if the vertex is not present in this tree
+ */
+ @Override
+ public int getDepth(V vertex) {
+ if (!containsVertex(vertex))
+ return -1;
+
+ return vertex_data.get(vertex).depth;
+ }
+
+ /**
+ * Returns the height of the tree, or -1 if the tree is empty.
+ */
+ @Override
+ public int getHeight() {
+ return height;
+ }
+
+ @Override
+ public V getParent(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+ else if (vertex.equals(root))
+ return null;
+
+ return edge_vpairs.get(vertex_data.get(vertex).parent_edge).getFirst();
+ }
+
+ @Override
+ public E getParentEdge(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ return vertex_data.get(vertex).parent_edge;
+ }
+
+ @Override
+ public V getRoot() {
+ return root;
+ }
+
+ @Override
+ public Collection> getTrees() {
+ Collection> forest = new ArrayList<>(1);
+ forest.add(this);
+
+ return forest;
+ }
+
+ /**
+ * Adds the specified {@code child} vertex and edge {@code e} to the graph with the specified
+ * parent vertex {@code parent}.
+ *
+ * @param e the edge to add
+ * @param parent the source of the edge to be added
+ * @param child the destination of the edge to be added
+ * @return {@code true} if the graph has been modified
+ */
+ @Override
+ public boolean addEdge(E e, V parent, V child) {
+ if (e == null || child == null || parent == null)
+ throw new IllegalArgumentException("Inputs must not be null");
+
+ if (!containsVertex(parent))
+ throw new IllegalArgumentException("Tree must already include parent: " + parent);
+
+ if (containsVertex(child))
+ throw new IllegalArgumentException("Tree must not already include child: " + child);
+
+ if (parent.equals(child))
+ throw new IllegalArgumentException("Input vertices must be distinct");
+
+ Pair endpoints = new Pair<>(parent, child);
+
+ if (containsEdge(e)) {
+ if (!endpoints.equals(edge_vpairs.get(e)))
+ throw new IllegalArgumentException("Tree already includes edge" + e
+ + " with different endpoints " + edge_vpairs.get(e));
+ else
+ return false;
+ }
+
+ VertexData parent_data = vertex_data.get(parent);
+ Set outedges = parent_data.child_edges;
+
+ if (outedges == null) {
+ parent_data.child_edges = new HashSet<>();
+ outedges = parent_data.child_edges;
+ }
+
+ outedges.add(e);
+
+ // initialize VertexData for child; leave child's child_edges null for now
+ VertexData child_data = new VertexData(e, parent_data.depth + 1);
+ vertex_data.put(child, child_data);
+
+ height = child_data.depth > height ? child_data.depth : height;
+ edge_vpairs.put(e, endpoints);
+
+ return true;
+ }
+
+ @Override
+ public boolean addEdge(E e, V v1, V v2, EdgeType edge_type) {
+ this.validateEdgeType(edge_type);
+
+ return addEdge(e, v1, v2);
+ }
+
+ @Override
+ public boolean addEdge(E edge, Pair extends V> endpoints, EdgeType edgeType) {
+ if (edge == null || endpoints == null)
+ throw new IllegalArgumentException("inputs must not be null");
+
+ return addEdge(edge, endpoints.getFirst(), endpoints.getSecond(), edgeType);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public boolean addEdge(E edge, Collection extends V> vertices, EdgeType edge_type) {
+ if (edge == null || vertices == null)
+ throw new IllegalArgumentException("inputs must not be null");
+
+ if (vertices.size() != 2)
+ throw new IllegalArgumentException("'vertices' must contain "
+ + "exactly 2 distinct vertices");
+
+ this.validateEdgeType(edge_type);
+
+ Pair endpoints;
+
+ if (vertices instanceof Pair)
+ endpoints = (Pair) vertices;
+ else
+ endpoints = new Pair<>(vertices);
+
+ V v1 = endpoints.getFirst();
+ V v2 = endpoints.getSecond();
+
+ if (v1.equals(v2))
+ throw new IllegalArgumentException("Input vertices must be distinct");
+
+ return addEdge(edge, v1, v2);
+ }
+
+ @Override
+ public boolean addVertex(V vertex) throws UnsupportedOperationException {
+ if (root == null) {
+ this.root = vertex;
+ vertex_data.put(vertex, new VertexData(null, 0));
+ this.height = 0;
+ return true;
+ }
+
+ else {
+ throw new UnsupportedOperationException("Unless you are setting "
+ + "the root, use addEdge() or addChild()");
+ }
+ }
+
+ @Override
+ public V getDest(E directed_edge) {
+ if (!containsEdge(directed_edge))
+ return null;
+
+ return edge_vpairs.get(directed_edge).getSecond();
+ }
+
+ @Override
+ public Pair getEndpoints(E edge) {
+ if (!containsEdge(edge))
+ return null;
+
+ return edge_vpairs.get(edge);
+ }
+
+ @Override
+ public Set getInEdges(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+ else if (vertex.equals(root))
+ return Collections.emptySet();
+ else
+ return Collections.singleton(getParentEdge(vertex));
+ }
+
+ @Override
+ public V getOpposite(V vertex, E edge) {
+ if (!containsVertex(vertex) || !containsEdge(edge))
+ return null;
+
+ Pair endpoints = edge_vpairs.get(edge);
+ V v1 = endpoints.getFirst();
+ V v2 = endpoints.getSecond();
+
+ return v1.equals(vertex) ? v2 : v1;
+ }
+
+ @Override
+ public Set getOutEdges(V vertex) {
+ return getChildEdges(vertex);
+ }
+
+ /**
+ * @return 0 if vertex
is the root, -1 if the vertex is not an element of this
+ * tree, and 1 otherwise
+ */
+ @Override
+ public int getPredecessorCount(V vertex) {
+ if (!containsVertex(vertex))
+ return -1;
+
+ return vertex.equals(root) ? 0 : 1;
+ }
+
+ /**
+ * @return Empty Set if the vertex
is the root, null if the vertex is not an
+ * element of this tree, and the Parent wrapper in a set otherwise
+ */
+ @Override
+ public Set getPredecessors(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ if (vertex.equals(root))
+ return Collections.emptySet();
+
+ return Collections.singleton(getParent(vertex));
+ }
+
+ @Override
+ public V getSource(E directed_edge) {
+ if (!containsEdge(directed_edge))
+ return null;
+
+ return edge_vpairs.get(directed_edge).getFirst();
+ }
+
+ @Override
+ public int getSuccessorCount(V vertex) {
+ return getChildCount(vertex);
+ }
+
+ @Override
+ public Set getSuccessors(V vertex) {
+ return getChildren(vertex);
+ }
+
+ @Override
+ public int inDegree(V vertex) {
+ if (!containsVertex(vertex))
+ return 0;
+
+ if (vertex.equals(root))
+ return 0;
+
+ return 1;
+ }
+
+ @Override
+ public boolean isDest(V vertex, E edge) {
+ if (!containsEdge(edge) || !containsVertex(vertex))
+ return false;
+
+ return edge_vpairs.get(edge).getSecond().equals(vertex);
+ }
+
+ /**
+ * Returns true
if vertex
is a leaf of this tree, i.e., if it has no
+ * children.
+ *
+ * @param vertex the vertex to be queried
+ */
+ public boolean isLeaf(V vertex) {
+ if (!containsVertex(vertex))
+ return false;
+
+ return outDegree(vertex) == 0;
+ }
+
+ /**
+ * Returns true iff v1
is the parent of v2
. Note that if
+ * v2
is the root and v1
is null
, this method returns
+ * true
.
+ */
+ @Override
+ public boolean isPredecessor(V v1, V v2) {
+ if (!containsVertex(v2))
+ return false;
+
+ return getParent(v2).equals(v1);
+ }
+
+ /**
+ * Returns true
if the vertex
is the root of this tree
+ *
+ * @param vertex the vertex to be queried
+ */
+ public boolean isRoot(V vertex) {
+ if (root == null)
+ return false;
+
+ return root.equals(vertex);
+ }
+
+ @Override
+ public boolean isSource(V vertex, E edge) {
+ if (!containsEdge(edge) || !containsVertex(vertex))
+ return false;
+
+ return edge_vpairs.get(edge).getFirst().equals(vertex);
+ }
+
+ /**
+ * Returns true iff v1
is the child of v2
. Note that if
+ * v2
is a leaf node and v1
is null
, this method returns
+ * true
.
+ */
+ @Override
+ public boolean isSuccessor(V v1, V v2) {
+ if (!containsVertex(v2))
+ return false;
+
+ if (containsVertex(v1))
+ return getParent(v1).equals(v2);
+
+ return isLeaf(v2) && v1 == null;
+ }
+
+ @Override
+ public int outDegree(V vertex) {
+ if (!containsVertex(vertex))
+ return 0;
+
+ Set out_edges = vertex_data.get(vertex).child_edges;
+
+ if (out_edges == null)
+ return 0;
+
+ return out_edges.size();
+ }
+
+ @Override
+ public boolean isIncident(V vertex, E edge) {
+ if (!containsVertex(vertex) || !containsEdge(edge))
+ return false;
+
+ return edge_vpairs.get(edge).contains(vertex);
+ }
+
+ @Override
+ public boolean isNeighbor(V v1, V v2) {
+ if (!containsVertex(v1) || !containsVertex(v2))
+ return false;
+
+ return getNeighbors(v1).contains(v2);
+ }
+
+ @Override
+ public boolean containsEdge(E edge) {
+ return edge_vpairs.containsKey(edge);
+ }
+
+ @Override
+ public boolean containsVertex(V vertex) {
+ return vertex_data.containsKey(vertex);
+ }
+
+ @Override
+ public E findEdge(V v1, V v2) {
+ if (!containsVertex(v1) || !containsVertex(v2))
+ return null;
+
+ VertexData v1_data = vertex_data.get(v1);
+
+ if (edge_vpairs.get(v1_data.parent_edge).getFirst().equals(v2))
+ return v1_data.parent_edge;
+
+ Set edges = v1_data.child_edges;
+
+ if (edges == null)
+ return null;
+
+ for (E edge : edges)
+ if (edge_vpairs.get(edge).getSecond().equals(v2))
+ return edge;
+
+ return null;
+ }
+
+ @Override
+ public Set findEdgeSet(V v1, V v2) {
+ E edge = findEdge(v1, v2);
+
+ if (edge == null)
+ return Collections.emptySet();
+ else
+ return Collections.singleton(edge);
+ }
+
+ @Override
+ public int getEdgeCount() {
+ return edge_vpairs.size();
+ }
+
+ @Override
+ public Set getEdges() {
+ return new ImmutableSet.Builder().addAll(edge_vpairs.keySet()).build();
+ }
+
+ @Override
+ public int getIncidentCount(E edge) {
+ return 2; // all tree edges have 2 incident vertices
+ }
+
+ public Set getIncidentEdges(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ Set edges = new HashSet<>();
+ VertexData v_data = vertex_data.get(vertex);
+
+ if (v_data.parent_edge != null)
+ edges.add(v_data.parent_edge);
+
+ if (v_data.child_edges != null) {
+ edges.addAll(v_data.child_edges);
+ }
+
+ if (edges.isEmpty())
+ return Collections.emptySet();
+
+ return new ImmutableSet.Builder().addAll(edges).build();
+ }
+
+ @Override
+ public Collection getIncidentVertices(E edge) {
+ return edge_vpairs.get(edge);
+ }
+
+ @Override
+ public int getNeighborCount(V vertex) {
+ if (!containsVertex(vertex))
+ return 0;
+
+ return (vertex.equals(root) ? 0 : 1) + this.getChildCount(vertex);
+ }
+
+ @Override
+ public Set getNeighbors(V vertex) {
+ if (!containsVertex(vertex))
+ return null;
+
+ Set vertices = new HashSet<>();
+ VertexData v_data = vertex_data.get(vertex);
+
+ if (v_data.parent_edge != null)
+ vertices.add(edge_vpairs.get(v_data.parent_edge).getFirst());
+
+ if (v_data.child_edges != null) {
+ for (E edge : v_data.child_edges)
+ vertices.add(edge_vpairs.get(edge).getSecond());
+ }
+
+ if (vertices.isEmpty())
+ return Collections.emptySet();
+
+ return new ImmutableSet.Builder().addAll(vertices).build();
+ }
+
+ @Override
+ public int getVertexCount() {
+ return vertex_data.size();
+ }
+
+ @Override
+ public Set getVertices() {
+ return new ImmutableSet.Builder().addAll(vertex_data.keySet()).build();
+ }
+
+ @Override
+ public boolean removeEdge(E edge) {
+ if (!containsEdge(edge))
+ return false;
+
+ removeVertex(edge_vpairs.get(edge).getSecond());
+ edge_vpairs.remove(edge);
+
+ return true;
+ }
+
+ @Override
+ public boolean removeVertex(V vertex) {
+ if (!containsVertex(vertex))
+ return false;
+
+ // recursively remove all of vertex's children
+ for (V v : getChildren(vertex))
+ removeVertex(v);
+
+ E parent_edge = getParentEdge(vertex);
+ edge_vpairs.remove(parent_edge);
+
+ Set edges = vertex_data.get(vertex).child_edges;
+
+ if (edges != null)
+ for (E edge : edges)
+ edge_vpairs.remove(edge);
+
+ vertex_data.remove(vertex);
+
+ return true;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof UnorderedTree, ?>))
+ return false;
+
+ UnorderedTree