Skip to content

Commit

Permalink
Merge pull request #94 from dice-group/develop
Browse files Browse the repository at this point in the history
Develop - release 0.3
  • Loading branch information
gsjunior86 authored Jan 11, 2019
2 parents 11c4ace + dfd6c3f commit f5e798c
Show file tree
Hide file tree
Showing 49 changed files with 1,015 additions and 679 deletions.
4 changes: 2 additions & 2 deletions Dockerfile.frontier
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ FROM openjdk:8u151-jdk
RUN apt-get update && apt-get install -y netcat

COPY ./squirrel.frontier/target/squirrel.frontier.jar /data/squirrel/squirrel.jar
COPY ./spring-config/default-config.xml /data/squirrel/default-config.xml
COPY ./spring-config/ /data/squirrel/spring-config
WORKDIR /data/squirrel

#ADD entrypoint.sh /entrypoint.sh
#RUN chmod +x /entrypoint.sh

VOLUME ["/var/squirrel/data"]

CMD java -cp squirrel.jar:. org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.FrontierComponent
CMD java -cp squirrel.jar:. org.hobbit.core.run.ComponentStarter org.dice_research.squirrel.components.FrontierComponent
16 changes: 16 additions & 0 deletions Dockerfile.web
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM openjdk:8u151-jdk

RUN apt-get update && apt-get install -y netcat

COPY ./squirrel.web/target/squirrel.web.jar /data/squirrel/squirrel.web.jar
COPY ./squirrel.web/target/squirrel.web.jar.original /data/squirrel/squirrel.web.jar.original
COPY ./squirrel.web/WEB-INF /data/squirrel/WEB-INF
WORKDIR /data/squirrel

#ADD entrypoint.sh /entrypoint.sh
#RUN chmod +x /entrypoint.sh

VOLUME ["/var/squirrel/data"]

CMD java -cp squirrel.web.jar:. com.squirrel.Application

2 changes: 1 addition & 1 deletion Dockerfile.worker
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM openjdk:8u151-jdk
RUN apt-get update && apt-get install -y netcat

COPY ./squirrel.worker/target/squirrel.worker.jar /data/squirrel/squirrel.jar
COPY ./spring-config/default-config.xml /data/squirrel/default-config.xml
COPY ./spring-config /data/squirrel/spring-config
WORKDIR /data/squirrel

#ADD entrypoint.sh /entrypoint.sh
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
default: build

build:
docker-compose -f docker-compose-sparql.yml down
docker-compose -f docker-compose.yml down
mvn clean install -U -DskipTests -Dmaven.javadoc.skip=true

dockerize:
docker build -f Dockerfile.frontier -t squirrel.frontier .
docker build -f Dockerfile.worker -t squirrel.worker .
docker build -f Dockerfile.web -t squirrel.web .

start: dockerize
docker-compose -f docker-compose-sparql.yml up
Expand Down
15 changes: 15 additions & 0 deletions build-squirrel
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
echo "Building Squirrel..."
cd squirrel.web-api
mvn clean install
cd ..
mvn clean install -DskipTests
clear
echo "Creating Frontier image..."
docker build -f Dockerfile.frontier -t squirrel.frontier .
echo "Creating Worker image..."
docker build -f Dockerfile.worker -t squirrel.worker .
echo "Creating Web image..."
docker build -f Dockerfile.web -t squirrel.web .
clear
echo "Finished"
4 changes: 2 additions & 2 deletions docker-compose-sparql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ services:
DEDUPLICATION_ACTIVE: "true"
HOBBIT_RABBIT_HOST: rabbit
OUTPUT_FOLDER: /var/squirrel/data
RDB_HOST_NAME: rethinkdb
RDB_PORT: 28015
MDB_HOST_NAME: mongodb
MDB_PORT: 27017
SPARQL_HOST_NAME: sparqlhost
SPARQL_HOST_PORT: 3030
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672"
Expand Down
39 changes: 21 additions & 18 deletions docker-compose-sparql-web.yml → docker-compose-web.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ services:
# ports:

frontier:
image: squirrel:latest
image: squirrel.frontier:latest
container_name: frontier
environment:
- HOBBIT_RABBIT_HOST=rabbit
- SEED_FILE=/var/squirrel/seeds.txt
- URI_WHITELIST_FILE=/var/squirrel/whitelist.txt
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
- COMMUNICATION_WITH_WEBSERVICE=true
- VISUALIZATION_OF_CRAWLED_GRAPH=true
volumes:
Expand All @@ -36,10 +36,13 @@ services:
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.dice_research.squirrel.components.FrontierComponent

web:
image: squirrel/webimage:latest
image: squirrel.web:latest
container_name: web
environment:
- HOST=rabbit
ports:
- "8080:8080"
command: java -jar squirrel.web.jar

sparqlhost:
image: stain/jena-fuseki
Expand Down Expand Up @@ -70,19 +73,19 @@ services:
- "5672:5672"

worker1:
image: squirrel:latest
image: squirrel.worker:latest
container_name: worker1
environment:
- HOBBIT_RABBIT_HOST=rabbit
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-sparqlStoreBased.xml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context.xml
- SPARQL_HOST_NAME=sparqlhost
#- CKAN_WHITELIST_FILE=/var/squirrel/ckanwhitelist.txt
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
#-CKAN_PORT=
volumes:
- ./data/worker1:/var/squirrel/data
Expand All @@ -92,19 +95,19 @@ services:
command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

worker2:
image: squirrel:latest
image: squirrel.worker:latest
container_name: worker2
environment:
- HOBBIT_RABBIT_HOST=rabbit
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-sparqlStoreBased.xml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context.xml
- SPARQL_HOST_NAME=sparqlhost
#- CKAN_WHITELIST_FILE=/var/squirrel/ckanwhitelist.txt
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
#-CKAN_PORT=
volumes:
- ./data/worker2:/var/squirrel/data
Expand All @@ -114,19 +117,19 @@ services:
command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

worker3:
image: squirrel:latest
image: squirrel.worker:latest
container_name: worker3
environment:
- HOBBIT_RABBIT_HOST=rabbit
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-sparqlStoreBased.xml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context.xml
- SPARQL_HOST_NAME=sparqlhost
#- CKAN_WHITELIST_FILE=/var/squirrel/ckanwhitelist.txt
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
#-CKAN_PORT=
volumes:
- ./data/worker3:/var/squirrel/data
Expand All @@ -142,8 +145,8 @@ services:
DEDUPLICATION_ACTIVE: "true"
HOBBIT_RABBIT_HOST: rabbit
OUTPUT_FOLDER: /var/squirrel/data
RDB_HOST_NAME: rethinkdb
RDB_PORT: 28015
MDB_HOST_NAME: mongodb
MDB_PORT: 27017
SPARQL_HOST_NAME: sparqlhost
SPARQL_HOST_PORT: 3030
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672"
Expand Down
88 changes: 54 additions & 34 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,30 @@ services:
container_name: frontier
environment:
- HOBBIT_RABBIT_HOST=rabbit
- SEED_FILE=/var/squirrel/seeds.txt
- URI_WHITELIST_FILE=/var/squirrel/whitelist.txt
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- SEED_FILE=/var/squirrel/seeds.txt
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
- COMMUNICATION_WITH_WEBSERVICE=false
- VISUALIZATION_OF_CRAWLED_GRAPH=false
- JVM_ARGS=-Xmx8g
volumes:
- ./data/frontier:/var/squirrel/data
- ./seed/seeds.txt:/var/squirrel/seeds.txt:ro
- ./whitelist/ckanwhitelist.txt:/var/squirrel/whitelist.txt:ro
# command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.dice_research.squirrel.components.FrontierComponent
- ./whitelist/whitelist.txt:/var/squirrel/whitelist.txt:ro
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.dice_research.squirrel.components.FrontierComponent

sparqlhost:
image: stain/jena-fuseki
container_name: sparqlhost
ports:
- "3030:3030"
volumes:
- ./data/sparqlhost/sparqlhost_data:/fuseki
environment:
- ADMIN_PASSWORD=pw123
- JVM_ARGS=-Xmx2g

mongodb:
image: mongo:4.0.0
volumes:
Expand All @@ -39,7 +50,8 @@ services:
volumes:
- ./data/rethinkdb:/data
ports:
- "28015:28015"
- "8080:8080"
command: rethinkdb --bind all

# message bus
rabbit:
Expand All @@ -58,15 +70,18 @@ services:
- HOBBIT_RABBIT_HOST=rabbit
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-fileBased.xml
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context.xml
- SPARQL_HOST_NAME=sparqlhost
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=false
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
- JVM_ARGS=-Xmx8g
volumes:
- ./data/worker1:/var/squirrel/data
- ./yaml:/var/squirrel/yaml
- ./spring-config:/var/squirrel/spring-config
# command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter
command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

worker2:
image: squirrel.worker:latest
Expand All @@ -76,14 +91,17 @@ services:
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-fileBased.xml
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- SPARQL_HOST_NAME=sparqlhost
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=false
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
- JVM_ARGS=-Xmx8g
volumes:
- ./data/worker2:/var/squirrel/data
- ./yaml:/var/squirrel/yaml
- ./spring-config:/var/squirrel/spring-config
# command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter
command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

worker3:
image: squirrel.worker:latest
Expand All @@ -93,28 +111,30 @@ services:
- OUTPUT_FOLDER=/var/squirrel/data
- HTML_SCRAPER_YAML_PATH=/var/squirrel/yaml
- CONTEXT_CONFIG_FILE=/var/squirrel/spring-config/context-fileBased.xml
- SPARQL_HOST_NAME=sparqlhost
- SPARQL_HOST_PORT=3030
- DEDUPLICATION_ACTIVE=true
- RDB_HOST_NAME=rethinkdb
- RDB_PORT=28015
- MDB_HOST_NAME=mongodb
- MDB_PORT=27017
- JVM_ARGS=-Xmx8g
volumes:
- ./data/worker3:/var/squirrel/data
- ./yaml:/var/squirrel/yaml
- ./spring-config:/var/squirrel/spring-config
# command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

# deduplicator:
# image: squirrel
# container_name: deduplicator
# environment:
# DEDUPLICATION_ACTIVE: "true"
# HOBBIT_RABBIT_HOST: rabbit
# OUTPUT_FOLDER: /var/squirrel/data
# RDB_HOST_NAME: rethinkdb
# RDB_PORT: 28015
# SPARQL_HOST_NAME: sparqlhost
# SPARQL_HOST_PORT: 3030
# SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672"
# volumes:
# - ./data/deduplicator:/var/squirrel/data
# command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.dice_research.squirrel.components.DeduplicatorComponent
command: java -cp squirrel.jar org.dice_research.squirrel.components.WorkerComponentStarter

deduplicator:
image: squirrel
container_name: deduplicator
environment:
DEDUPLICATION_ACTIVE: "true"
HOBBIT_RABBIT_HOST: rabbit
OUTPUT_FOLDER: /var/squirrel/data
MDB_HOST_NAME: mongodb
MDB_PORT: 27017
SPARQL_HOST_NAME: sparqlhost
SPARQL_HOST_PORT: 3030
SERVICE_PRECONDITION: "rethinkdb:28015 rabbit:5672"
volumes:
- ./data/deduplicator:/var/squirrel/data
command: java -cp squirrel.jar org.hobbit.core.run.ComponentStarter org.aksw.simba.squirrel.components.DeduplicatorComponent
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.dice-research</groupId>
<artifactId>squirrel</artifactId>
<version>0.3.0-SNAPSHOT</version>
<version>0.3.0</version>
<packaging>pom</packaging>
<inceptionYear>2017</inceptionYear>
<name>Squirrel</name>
Expand Down Expand Up @@ -33,12 +33,12 @@

<!-- MODULES -->
<modules>
<module>squirrel.web-api</module>
<module>squirrel.web</module>
<module>squirrel.api</module>
<module>squirrel.deduplication</module>
<module>squirrel.frontier</module>
<module>squirrel.mockup</module>
<module>squirrel.web</module>
<module>squirrel.web-api</module>
<module>squirrel.worker</module>
</modules>

Expand Down
2 changes: 1 addition & 1 deletion seed/seeds.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
https://dbpedia.org/resource/New_York
https://dbpedia.org/resource/Moscow
https://dbpedia.org/resource/Brazil
https://dbpedia.org/resource/China
Loading

0 comments on commit f5e798c

Please sign in to comment.