diff --git a/.gitignore b/.gitignore
index 172c006..3b99ee0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ venv/
 # ignore py compiled etc. files
 *.pyc
 *.pyo
+
+# ignore .idea
+.idea/
diff --git a/config/defaults.py b/config/defaults.py
index cf153d9..c697437 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -84,11 +84,18 @@
 MUTPB_EN_OUT_LINK = 0.5  # probability to add an outgoing triple (otherwise in)
 MUTPB_AE = 0.2  # prob to try adding an edge between two nodes
 MUTPB_ID = 0.05  # prob to increase distance between source and target by 1 hop
-MUTPB_FV = 0.4  # prob to fix a variable (SPARQL)
+MUTPB_FV = 0.25  # prob to fix a variable (SPARQL)
 MUTPB_FV_RGTP_SAMPLE_N = 128  # sample <= n remaining GTPs to fix variables for
 MUTPB_FV_SAMPLE_MAXN = 32  # max n of instantiations to sample from top k
 MUTPB_FV_QUERY_LIMIT = 256  # SPARQL query limit for the top k instantiations
 MUTPB_SP = 0.05  # prob to simplify pattern (warning: can restrict exploration)
+# TODO: Lower the MUTPB_DN
+MUTPB_DN = 0.6  # prob to try adding a deep and narrow path to a pattern
+MUTPB_DN_MAX_HOPS = 10  # Max number of hops in the deep narrow path
+MUTPB_DN_MAX_HOPS_ALPHA = 1.15  # alpha value in a length beta distribution
+MUTPB_DN_MAX_HOPS_BETA = 1.85  # beta value in a length beta distribution
+MUTPB_DN_AVG_DEG_LIMIT = 10  # Max avg. reachable Nodes
+MUTPB_DN_MAX_HOP_INST = 10  # Max number of hop instances for the next query/ies
 
 # fusion of target candidates:
 FUSION_SAMPLES_PER_CLASS = 500  # only use up to n training samples per class
diff --git a/gp_learner.py b/gp_learner.py
index 008310f..d183978 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -54,6 +54,8 @@
 from gp_query import query_stats
 from gp_query import query_time_hard_exceeded
 from gp_query import query_time_soft_exceeded
+from gp_query import deep_narrow_path_query
+from gp_query import deep_narrow_path_inst_query
 from gp_query import variable_substitution_query
 from graph_pattern import canonicalize
 from graph_pattern import gen_random_var
@@ -685,6 +687,142 @@ def mutate_fix_var(
     return res
 
 
+def mutate_deep_narrow_path(
+        sparql,
+        timeout,
+        gtp_scores,
+        child,
+        directions=None,
+        child_in_queries=False,
+        limit=None,  # TODO: Use a limit for the queries?
+):
+    """ Finds n-hop-connections from Source to Target, to add them to a given
+    Graph-Pattern.
+    
+    The outline of the mutation is as follows:
+    - If not evaluated, evaluates the given GP to work on its matching-node-
+      pairs
+    - If not passed in, randomly selects the path-length and the directions
+      of the single hops.
+    - Issues SPARQL queries, to find hops (from Source and Target), that don't
+      have a big fan-out (smaller than the default-value). Uses an default max-
+      amount of found hops to find the next hop.
+      When there is only one hop left to find, it tries to instanciate paths,
+      that fit to an STP. If such a path is found, its hops are added to the GP.
+      As there could be more than one path, the mutation returns a list of such
+      patterns.
+
+    :param directions: list of directions to use for the hops
+        (1: Source -> Target, -1: Target -> Source,
+        0 (or everything else): choose random)
+    :param child_in_queries: If true: add the triples of the given pattern to
+        the queries
+    :param limit: SPARQL limnit
+    :return: list of children in which a deep_narrow_path is added
+    """
+    if not child.fitness.valid:
+        ev = evaluate(
+            sparql, timeout, gtp_scores, child, run=-1, gen=-1)
+        update_individuals([child], [ev])
+    gtps = child.matching_node_pairs
+    if not gtps:
+        return [child]
+    if directions:
+        n = len(directions) - 1
+    else:
+        alpha = config.MUTPB_DN_MAX_HOPS_ALPHA
+        beta = config.MUTPB_DN_MAX_HOPS_BETA
+        max_hops = config.MUTPB_DN_MAX_HOPS
+        # more likely to create shorter paths
+        # with default values the distribution is as follows:
+        # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
+        # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
+        n = int(random.betavariate(alpha, beta) * max_hops + 1)
+    nodes = [SOURCE_VAR] + [Variable('n%d' % i) for i in range(n)] + [TARGET_VAR]
+    hops = [Variable('p%d' % i) for i in range(n + 1)]
+    if not directions:
+        directions = [0 for _ in range(n + 1)]
+    directions = [
+        random.choice([-1, 1]) if d not in [-1, 1] else d for d in directions
+    ]
+    gp_hops = [
+        # directions[i] == 1 => hop in the direction source -> target
+        GraphPattern([(nodes[i], hops[i], nodes[i + 1])]) if directions[i] == 1
+        # directions[i] == -1 => hop in the direction target -> source
+        else GraphPattern([(nodes[i + 1], hops[i], nodes[i])])
+        for i in range(n+1)
+    ]
+    # queries to get the first n hops:
+    valueblocks_s = {}
+    valueblocks_t = {}
+    for i in range(n // 2 + 1):
+        if i < int(n/2):
+            t, q_res = deep_narrow_path_query(
+                sparql,
+                timeout,
+                child,
+                hops[i],
+                nodes[i+1],
+                valueblocks_s,
+                gp_hops[:i + 1],
+                SOURCE_VAR,
+                gp_in=child_in_queries,
+            )
+            if not q_res:
+                return [child]
+            valueblocks_s[hops[i]] = {
+                (hops[i],): random.sample(
+                    [(q_r,) for q_r in q_res],
+                    min(config.MUTPB_DN_MAX_HOP_INST, len(q_res))
+                )
+            }
+        if n-i > i:
+            t, q_res = deep_narrow_path_query(
+                sparql,
+                timeout,
+                child,
+                hops[n-i],
+                nodes[n-i],
+                valueblocks_t,
+                gp_hops[n - i:],
+                TARGET_VAR,
+                gp_in=child_in_queries,
+            )
+            if not q_res:
+                return [child]
+            valueblocks_t[hops[n-i]] = {
+                (hops[n-i],): random.sample(
+                    [(q_r,) for q_r in q_res],
+                    min(config.MUTPB_DN_MAX_HOP_INST, len(q_res))
+                )
+            }
+
+    # query to get the last hop and instantiations, that connect source and
+    # target
+    valueblocks = {}
+    valueblocks.update(valueblocks_s)
+    valueblocks.update(valueblocks_t)
+    t, q_res = deep_narrow_path_inst_query(
+        sparql,
+        timeout,
+        child,
+        hops,
+        valueblocks,
+        gp_hops,
+        gp_in=child_in_queries
+    )
+    if not q_res:
+        return [child]
+    res = [
+        child + GraphPattern([
+            (nodes[i], qr[i], nodes[i + 1]) if directions[i] == 1
+            else (nodes[i + 1], qr[i], nodes[i])
+            for i in range(n + 1)
+        ]) for qr in q_res
+    ]
+    return res
+
+
 def mutate_simplify_pattern(gp):
     if len(gp) < 2:
         return gp
@@ -797,6 +935,7 @@ def mutate(
         pb_mv=config.MUTPB_MV,
         pb_sp=config.MUTPB_SP,
         pb_sv=config.MUTPB_SV,
+        pb_dn=config.MUTPB_DN,
 ):
     # mutate patterns:
     # grow: select random identifier and convert them into a var (local)
@@ -835,10 +974,10 @@ def mutate(
         child = canonicalize(child)
         children = mutate_fix_var(sparql, timeout, gtp_scores, child)
     else:
-        children = [child]
-
-
-    # TODO: deep & narrow paths mutation
+        if random.random() < pb_dn:
+            children = mutate_deep_narrow_path(sparql, timeout, gtp_scores, child)
+        else:
+            children = [child]
 
     children = {
         c if fit_to_live(c) else orig_child
diff --git a/gp_query.py b/gp_query.py
index 0a4618d..1763ece 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -62,6 +62,8 @@ def __init__(self):
         self.ask_multi_query_count = 0
         self.combined_ask_count_multi_query_count = 0
         self.variable_substitution_query_count = 0
+        self.useful_path_query_count = 0
+        self.useful_path_inst_query_count = 0
         self.predict_query_count = 0
         self.count_query_count = 0
 
@@ -695,6 +697,144 @@ def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds):
 
 def _var_subst_res_update(res, update, **_):
     res += update
+    
+
+def deep_narrow_path_query(
+        sparql,
+        timeout,
+        graph_pattern,
+        var_to_fix,
+        var_to_count,
+        valueblocks,
+        steps,
+        startvar,
+        avglimit=config.MUTPB_DN_AVG_DEG_LIMIT,
+        gp_in=False,
+        batch_size=None
+):
+    _query_stats.useful_path_query_count += 1
+    # TODO: maybe batch_size = batch_size - 10 * number of valueblocks for hops
+    _values = graph_pattern.matching_node_pairs
+    # TODO: maybe use not good covered stp
+    _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
+    _vars_steps_and_stuff = (
+        var_to_fix, var_to_count, startvar, valueblocks, steps, avglimit, gp_in
+    )
+    return _multi_query(
+        sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
+        batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
+        _deep_narrow_path_res_init, _deep_narrow_path_chunk_q,
+        _deep_narrow_path_chunk_result_ext, _deep_narrow_path_res_update
+    )
+
+
+# noinspection PyUnusedLocal
+def _deep_narrow_path_res_init(_, **kwds):
+    return []
+
+
+def _deep_narrow_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+    var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
+        = _vars_steps_and_stuff
+    valueblocks = {
+        startvar: {
+            (startvar,):
+                [(tup[0],) for tup in values_chunk] if startvar == SOURCE_VAR
+                else [(tup[1],) for tup in values_chunk]
+        }
+    }
+    valueblocks.update(_valueblocks)
+    return gp.to_sparql_deep_narrow_path_query(
+            var_to_fix,
+            var_to_count,
+            valueblocks,
+            steps,
+            startvar,
+            avglimit=avglimit,
+            gp_in=gp_in
+    )
+
+
+# noinspection PyUnusedLocal
+def _deep_narrow_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
+    var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
+        = _vars_steps_and_stuff
+    chunk_res = []
+    res_rows_path = ['results', 'bindings']
+    bindings = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    for row in bindings:
+        # TODO: Maybe return the avg-degree too
+        chunk_res.append(get_path(row, [var_to_fix]))
+    return chunk_res
+
+
+def _deep_narrow_path_res_update(res, update, **_):
+    res += update
+    
+    
+def deep_narrow_path_inst_query(
+        sparql,
+        timeout,
+        graph_pattern,
+        hop,
+        valueblocks,
+        steps,
+        gp_in=False,
+        batch_size=None
+):
+    _query_stats.useful_path_inst_query_count += 1
+    # TODO: maybe batch_size = batch_size - 10 * number of valueblocks for hops
+    _values = graph_pattern.matching_node_pairs
+    # TODO: maybe use not good covered stp
+    _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
+    _vars_steps_and_stuff = (hop, valueblocks, steps, gp_in)
+    return _multi_query(
+        sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
+        batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
+        _deep_narrow_path_inst_res_init, _deep_narrow_path_inst_chunk_q,
+        _deep_narrow_path_inst_chunk_result_ext,
+        _deep_narrow_path_inst_res_update
+    )
+
+
+# noinspection PyUnusedLocal
+def _deep_narrow_path_inst_res_init(_, **kwds):
+    return []
+
+
+def _deep_narrow_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+    hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
+    valueblocks = {
+        'st': {
+            (SOURCE_VAR, TARGET_VAR): values_chunk
+        }
+    }
+    valueblocks.update(_valueblocks)
+    return gp.to_sparql_deep_narrow_path_inst_query(
+        hop, valueblocks, steps, gp_in=gp_in
+    )
+
+
+# noinspection PyUnusedLocal
+def _deep_narrow_path_inst_chunk_result_ext(
+        q_res, _vars_steps_and_stuff, _, **kwds
+):
+    hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
+    chunk_res = []
+    res_rows_path = ['results', 'bindings']
+    bindings = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+
+    for row in bindings:
+        chunk_res.append([get_path(row, [h]) for h in hop])
+    return chunk_res
+
+
+def _deep_narrow_path_inst_res_update(res, update, **_):
+    res += update
 
 
 def generate_stps_from_gp(sparql, gp):
diff --git a/graph_pattern.py b/graph_pattern.py
index a483c88..d46f654 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -245,7 +245,7 @@ def canonicalize(gp, shorten_varnames=True):
         and len(gp.nodes) == len(cgp.nodes)
         and len(gp.edges) == len(cgp.edges)
         and sorted(gp.identifier_counts().values()) ==
-            sorted(cgp.identifier_counts().values())
+        sorted(cgp.identifier_counts().values())
     ):
         # canonicalization should never change any of the features above, but it
         # did before (e.g., https://github.com/RDFLib/rdflib/issues/494 ).
@@ -636,6 +636,107 @@ def to_sparql_select_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
+
+    def to_sparql_deep_narrow_path_query(
+            self,
+            var_to_fix,
+            var_to_count,
+            valueblocks,
+            steps,
+            startvar,
+            avglimit=10,
+            gp_in=False
+    ):
+        # TODO: Maybe use a limit
+        count_var_to_count = Variable('c' + ''.join(var_to_count))
+        avg_var_to_count = Variable('avgc' + ''.join(var_to_count))
+        res = "SELECT %(vtf)s (AVG(%(cvtc)s) as %(avtc)s) {\n" \
+              "SELECT %(stv)s %(vtf)s (COUNT (%(vtc)s) as %(cvtc)s) {\n" \
+              "%(val)s" \
+              "%(trip)s }\n" \
+              "GROUP BY %(stv)s %(vtf)s }\n" \
+              "GROUP BY %(vtf)s\n" \
+              "HAVING (AVG (%(cvtc)s) < %(avgl)s)" % {
+                  'vtf': ''.join(var_to_fix.n3()),
+                  'cvtc': ''.join(count_var_to_count.n3()),
+                  'avtc': ''.join(avg_var_to_count.n3()),
+                  'stv': ''.join(startvar.n3()),
+                  'vtc': ''.join(var_to_count.n3()),
+                  'val': ''.join([
+                      'VALUES (%s) {\n%s }\n' % (
+                          ' '.join(var.n3() for var in valueblocks[key].keys()[0]),
+                          ''.join(['(%s)\n' %
+                                   ' '.join(self.curify(v) for v in vt)
+                                   for vt in valueblocks[key][(key,)]])
+                      ) for key in valueblocks.keys()
+                  ]),
+                  'trip': ''.join([
+                      '%s %s %s .\n' % (s.n3(), p.n3(), o.n3())
+                      for step in steps
+                      for s, p, o in step
+                  ]) + ''.join([
+                      self._sparql_triples_part(indent=' ') if gp_in else ''
+                  ]),
+                  'avgl': str(avglimit),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
+
+    def to_sparql_deep_narrow_path_inst_query(
+            self,
+            hop,
+            valueblocks,
+            steps,
+            gp_in=False
+    ):
+        # TODO: Maybe use a limit
+        res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
+              "%(val)s" \
+              "%(trip)s }\n" \
+              "GROUP BY %(vtf)s\n" \
+              "HAVING (COUNT (?source) > 0)" % {
+                  'vtf': ' '.join([var.n3() for var in hop]),
+                  'val': ''.join([
+                      'VALUES (%s) {\n%s }\n' % (
+                          ' '.join(var.n3() for var in valueblocks[key].keys()[0]),
+                          ''.join(['(%s)\n' %
+                                   ' '.join(self.curify(v) for v in vt)
+                                   for vt in valueblocks[key].values()[0]])
+                      ) for key in valueblocks.keys()
+                  ]),
+                  'trip': ''.join([
+                      '%s %s %s .\n' % (s.n3(), p.n3(), o.n3())
+                      for step in steps
+                      for s, p, o in step
+                  ]) + ''.join([
+                      self._sparql_triples_part(indent=' ') if gp_in else ''
+                  ]),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
+    def to_sparql_precheck_query(
+            self,
+            values,
+            gp_in=False
+    ):
+        res = "SELECT * {\n" \
+              "%(val)s\n" \
+              "%(trip)s\n" \
+              "}\n" \
+              "LIMIT 1" % {
+                  'val': ''.join(
+                      self._sparql_values_part(values=values, indent=' ')
+                  ),
+                  'trip': ''.join(self._sparql_triples_part(indent=' ')) +
+                          ''.join([
+                              self._sparql_triples_part(indent=' ') if gp_in else ''
+                          ]),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
     def to_sparql_ask_query(
             self,
             bind=None,
@@ -656,9 +757,9 @@ def _sparql_query_pattern_part(
     ):
         assert bind is None or isinstance(bind, dict)
         assert values is None or (
-            isinstance(values, dict) and
-            isinstance(next(six.iterkeys(values)), Iterable) and
-            isinstance(next(six.itervalues(values)), Iterable)
+                isinstance(values, dict) and
+                isinstance(next(six.iterkeys(values)), Iterable) and
+                isinstance(next(six.itervalues(values)), Iterable)
         )
 
         res = ''
@@ -1042,7 +1143,6 @@ def rate_graph_pattern(self, gp):
         ]
         return res
 
-
     def prune_counts(self, below=2):
         lns = len(self.identifier_gt_node_sum)
         ln = len(self.identifier_gt_node_count)
@@ -1069,7 +1169,7 @@ def prune_counts(self, below=2):
 
     def __str__(self):
         return '%s: pairs: %d, nodes: %d, Identifier counts:\n' \
-            'Pairs: %s\nNodes: %s' % (
-                self.__class__.__name__, len(self.gt_pairs), len(self.nodes),
-                self.identifier_gt_pair_count, self.identifier_gt_node_count
-            )
+               'Pairs: %s\nNodes: %s' % (
+                   self.__class__.__name__, len(self.gt_pairs), len(self.nodes),
+                   self.identifier_gt_pair_count, self.identifier_gt_node_count
+               )
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
new file mode 100644
index 0000000..b636735
--- /dev/null
+++ b/tests/test_mutate_deep_narrow.py
@@ -0,0 +1,204 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""Testet die verschiedenen Versionen der mutatete_deep_narrow
+"""
+
+import logging
+import numpy as np
+import pickle
+import random
+from collections import defaultdict
+from collections import OrderedDict
+from os import getenv
+
+import SPARQLWrapper
+from itertools import chain
+from splendid import get_path
+from splendid import time_func
+import socket
+import rdflib
+from rdflib import BNode
+from rdflib import Literal
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_deep_narrow_path
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import gen_random_var
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from serialization import print_graph_pattern
+from utils import sparql_json_result_bindings_to_rdflib
+
+logger = logging.getLogger(__name__)
+
+sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
+# sparql = SPARQLWrapper.SPARQLWrapper(
+#     getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (sparql.endpoint, __file__))
+
+dbr = rdflib.Namespace('http://dbpedia.org/resource/')
+owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
+dbo = rdflib.Namespace('http://dbpedia.org/ontology/')
+gold = rdflib.Namespace('http://purl.org/linguistics/gold')
+dbt = rdflib.Namespace('http://dbpedia.org/resource/Template:')
+dbp = rdflib.Namespace('http://dbpedia.org/property/')
+
+v = [gen_random_var() for i in range(100)]
+
+sameAs = owl['sameAs']
+pwl = dbo['wikiPageWikiLink']
+hypernym = gold['hypernym']
+wpUseTemp = dbp['wikiPageUsesTemplate']
+
+gp_found = {}
+gp_found['1'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (SOURCE_VAR, v[0], v[1]),
+    (v[1], hypernym, TARGET_VAR)
+])
+gp_found['2'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], URIRef('http://dbpedia.org/dbtax/Page'))
+])
+gp_found['3'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Sister_project_links'])
+])
+gp_found['4'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, wpUseTemp, dbt['Pp-semi-indef'])
+])
+gp_found['5'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], dbt['Pp-semi-indef'])
+])
+gp_found['6'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Cite_book'])
+])
+gp_found['7'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Redirect'])
+])
+gp_found['8'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR)
+])
+gp_found['50'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Use_dmy_dates'])
+])
+gp_found['51'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Refend'])
+])
+gp_found['52'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+     URIRef('http://dbpedia.org/dbtax/Page'))
+])
+gp_found['54'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (v[0], sameAs, SOURCE_VAR)
+])
+gp_found['55'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (TARGET_VAR, pwl, SOURCE_VAR)
+])
+gp_found['67'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Portal'])
+])
+gp_found['68'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Convert'])
+])
+gp_found['69'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (v[0], hypernym, SOURCE_VAR)
+])
+gp_found['72'] = GraphPattern([
+    (SOURCE_VAR, URIRef('http://purl.org/dc/terms/subject'), v[1]),
+    (TARGET_VAR, pwl, SOURCE_VAR),
+    (v[0], sameAs, v[1]),
+    (v[1], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR)
+])
+gp_found['94'] = GraphPattern([
+    (SOURCE_VAR, URIRef('http://purl.org/dc/terms/subject'), v[1]),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (v[1], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR)
+])
+gp_found['131'] = GraphPattern([
+    (SOURCE_VAR, v[0], v[2]),
+    (TARGET_VAR, pwl, v[1]),
+    (v[2], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR),
+])
+gp_found['140'] = GraphPattern([
+    (TARGET_VAR, pwl, SOURCE_VAR),
+    (TARGET_VAR, wpUseTemp, dbt['Other_uses']),
+    (TARGET_VAR, wpUseTemp, dbt['Pp-move-indef']),
+    (v[0], URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), TARGET_VAR),
+])
+# Bis hier jedes mit neuem Fingerprint, jetzt noch 3 vom Rest
+gp_found['231'] = GraphPattern([
+    (SOURCE_VAR, dbo['class'], TARGET_VAR),
+    (TARGET_VAR, dbp['subdivisionRanks'], v[0])
+])
+gp_found['323'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (v[0], dbp['species'], TARGET_VAR),
+    (v[1], dbo['wikiPageDisambiguates'], TARGET_VAR)
+])
+gp_found['516'] = GraphPattern([
+    (SOURCE_VAR, pwl, v[1]),
+    (TARGET_VAR, dbp['image'], v[0]),
+    (v[1], hypernym, TARGET_VAR),
+    (v[2], dbo['wikiPageRedirects'], SOURCE_VAR)
+])
+
+
+def main():
+    ground_truth_pairs = get_semantic_associations()
+    ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    # ground_truth_pairs = ground_truth_pairs[:100]
+    gtp_scores = GTPScores(ground_truth_pairs)
+    res = []
+    for i in range(100):
+        key = random.choice(gp_found.keys())
+        gp_ = gp_found[key]
+        # eval_gp(gtp_scores, gp_)
+        r = mutate_deep_narrow_path(sparql, timeout, gtp_scores, gp_)
+        logger.info(i)
+        logger.info(r)
+        res.append(r)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
new file mode 100644
index 0000000..044449d
--- /dev/null
+++ b/tests/test_sampling.py
@@ -0,0 +1,398 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""Tested das bauen von graph_pattern per gesampeltem finden von 1-hop wegen
+und fix-var-mutation
+"""
+
+import logging
+import random
+from collections import defaultdict
+from collections import OrderedDict
+from os import getenv
+
+import SPARQLWrapper
+from splendid import get_path
+from splendid import time_func
+import socket
+import rdflib
+from rdflib import BNode
+from rdflib import Literal
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from serialization import print_graph_pattern
+from utils import sparql_json_result_bindings_to_rdflib
+
+logger = logging.getLogger(__name__)
+
+sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
+# sparql = SPARQLWrapper.SPARQLWrapper(
+#     getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (sparql.endpoint, __file__))
+
+dbp = rdflib.Namespace('http://dbpedia.org/resource/')
+owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
+
+a = Variable('a')
+b = Variable('b')
+c = Variable('c')
+d = Variable('d')
+e = Variable('e')
+f = Variable('f')
+v = Variable('v')
+w = Variable('w')
+
+sameAs = owl['sameAs']
+
+gp_1 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR)
+])
+
+gp_2 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR),
+    (TARGET_VAR, w, SOURCE_VAR)
+])
+
+gp_3 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (d, e, TARGET_VAR)
+])
+
+gp_4 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (TARGET_VAR, e, d)
+])
+
+gp_5 = GraphPattern([
+    (SOURCE_VAR, a, c),
+    (TARGET_VAR, URIRef('http://dbpedia.org/ontology/thumbnail'), d),
+    (TARGET_VAR, URIRef('http://dbpedia.org/property/image'), b),
+    (c, URIRef('http://dbpedia.org/ontology/wikiPageWikiLink'), SOURCE_VAR),
+    (c, URIRef('http://purl.org/linguistics/gold/hypernym'), TARGET_VAR)
+])
+
+ground_truth_pairs_1 = [
+    (dbp['Berlin'], dbp['Germany']),
+    (dbp['Hamburg'], dbp['Germany']),
+    (dbp['Kaiserslautern'], dbp['Germany']),
+    (dbp['Wien'], dbp['Austria']),
+    (dbp['Insbruck'], dbp['Austria']),
+    (dbp['Salzburg'], dbp['Austria']),
+    (dbp['Paris'], dbp['France']),
+    (dbp['Lyon'], dbp['France']),
+    (dbp['Amsterdam'], dbp['Netherlands']),
+    (dbp['Brussels'], dbp['Belgium']),
+    (dbp['Washington'], dbp['United_States']),
+    (dbp['Madrid'], dbp['Spain']),
+    (dbp['Prague'], dbp['Czech_Republic']),
+    (dbp['Bern'], dbp['Switzerland']),
+]
+
+ground_truth_pairs_2 = get_semantic_associations()
+ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
+ground_truth_pairs_2 = random.sample(ground_truth_pairs_2, 100)
+
+ground_truth_pairs_3 = [
+    (dbp['Barrister'], dbp['Law']),
+    (dbp['Christ'], dbp['Jesus']),
+    (dbp['Pottage'], dbp['Soup'])
+    ]
+
+ground_truth_pairs_4 = [
+    (dbp['Motorrad_(disambiguation)'], dbp['Bmw_motorcycle']),
+    (dbp['Horse'], dbp['Saddle'])
+]
+
+gtp_scores_1 = GTPScores(ground_truth_pairs_1)
+gtp_scores_2 = GTPScores(ground_truth_pairs_2)
+gtp_scores_3 = GTPScores(ground_truth_pairs_3)
+gtp_scores_4 = GTPScores(ground_truth_pairs_4)
+
+
+def test_count(gtps, max_out):
+    # values = {(SOURCE_VAR, TARGET_VAR): gtps} hier besser nur die sources
+    source_list = [(stp[0], ) for stp in gtps]
+    values = {(SOURCE_VAR, ): source_list}
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # SPARQL-Query die über eine Var aus gp1 random samplet
+    q = gp1.to_sparql_filter_by_count_out_query(
+        values=values, count_node=b, max_out=max_out, limit=200)
+    logger.info(q)
+    t, q_res1 = run_query(q)
+    logger.info(q_res1)
+    # Kreiere b_list in der die Ergebnisse für b "gespeichert" sind
+    # TODO: als Methode, die Listenform (Tupellistenform) der gefundenen
+    # Bindings zu gewünschten Variablen zurückgibt.
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    b_list = []
+    for row in bind1:
+        x = get_path(row, [b])
+        y = (x, )
+        b_list.append(y)
+    logger.info('orig query took %.4f s, result:\n%s\n', t, b_list)
+    b_list[:] = [b_l for b_l in b_list if not list_remove_bool(b_l[0])]
+    b_list = list(set(b_list))
+    # Values für die nächste query: b_list
+    values = {(b, ): b_list}
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+    logger.info(q)
+    try:
+        t, q_res2 = run_query(q)
+    except:
+        return []
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    target_list = []
+    for row in bind2:
+        target_list.append(get_path(row, [TARGET_VAR]))
+    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+    # Kreire gtps_2 in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    gtps_2 = []
+    for t in target_list:
+        for gtp in gtps:
+            if t == gtp[1]:
+                gtps_2.append(gtp)
+    logger.info(gtps_2)
+
+    # GraphPattern mit gefixten Pfaden aus den gefundenen gtp kreieren:
+    # TODO: Das ganze als Methode aus einem graph-pattern, den results und
+    # den stp
+    gp_list = []
+    for row2 in bind2:
+        for gtp in gtps:
+            if gtp[1] == get_path(row2, [TARGET_VAR]):
+                for row1 in bind1:
+                    if get_path(row1, [b]) == get_path(row2, [b]):
+                        gp_ = GraphPattern([
+                            (SOURCE_VAR, get_path(row1, [a]), b),
+                            (b, get_path(row2, [c]), TARGET_VAR)
+                        ])
+                        if gp_ not in gp_list:
+                            gp_list.append(gp_)
+
+    # gp3 = GraphPattern([
+    #     (SOURCE_VAR, a, b),
+    #     (b, c, TARGET_VAR)
+    # ])
+    gtp_scores = GTPScores(gtps)
+    # gtp_scores2 = GTPScores(gtps_2)
+
+    # # Fixe das pattern über die gefundenen gtps
+    # mfv2 = []
+    # if len(gtps_2) > 1:
+    #     mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
+    #
+    # # lasse die gefundenen Pattern einmal durch die fix_var laufen
+    # mfv = []
+    # for gp_mfv2 in mfv2:
+    #     mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
+    #     for gp_res in mfv_res:
+    #         mfv.append(gp_res)
+    #
+    # # evaluiere die so gefundenen Pattern
+    # res_eval = eval_gp_list(gtp_scores, mfv)
+    # return res_eval
+
+    # evaluiere die gefixten pattern
+    res_eval = eval_gp_list(gtp_scores, gp_list)
+    return res_eval
+
+
+def test_sample(gtps):
+    values = {(SOURCE_VAR, TARGET_VAR): gtps}
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # SPARQL-Query die über eine Var aus gp1 random samplet.
+    # TODO: Query so verändern, dass nach count gefiltert wird (siehe log.txt)
+    q = gp1.to_sparql_select_sample_query(values=values, limit=100)
+    logger.info(q)
+    t, q_res1 = run_query(q)
+    logger.info(q_res1)
+    # Kreiere b_list in der die Ergebnisse für b "gespeichert" sind
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    b_list = []
+    for row in bind1:
+        x = get_path(row, [b])
+        y = (x, )
+        b_list.append(y)
+    logger.info('orig query took %.4f s, result:\n%s\n', t, b_list)
+    b_list[:] = [b_l for b_l in b_list if not list_remove_bool(b_l[0])]
+    # Values für die nächste query: b_list
+    values = {(b, ): b_list}
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+    logger.info(q)
+    t, q_res2 = run_query(q)
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    target_list = []
+    for row in bind2:
+        target_list.append(get_path(row, [TARGET_VAR]))
+    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+    # Kreire gtps_2 in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    gtps_2 = []
+    for t in target_list:
+        for gtp in gtps:
+            if t == gtp[1]:
+                gtps_2.append(gtp)
+    logger.info(gtps_2)
+
+    # GraphPattern mit gefixten Pfaden aus den gefundenen gtp kreieren:
+    # TODO: Das ganze als Methode aus einem graph-pattern, den results und
+    # den stp
+    gp_list = []
+    for row2 in bind2:
+        for gtp in gtps:
+            if gtp[1] == get_path(row2, [TARGET_VAR]):
+                for row1 in bind1:
+                    if get_path(row1, [b]) == get_path(row2, [b]):
+                        gp_ = GraphPattern([
+                            (SOURCE_VAR, get_path(row1, [a]), b),
+                            (b, get_path(row2, [c]), TARGET_VAR)
+                        ])
+                        if gp_ not in gp_list:
+                            gp_list.append(gp_)
+
+    # gp3 = GraphPattern([
+    #     (SOURCE_VAR, a, b),
+    #     (b, c, TARGET_VAR)
+    # ])
+    gtp_scores = GTPScores(gtps)
+    # gtp_scores2 = GTPScores(gtps_2)
+
+    # # Fixe das pattern über die gefundenen gtps
+    # mfv2 = []
+    # if len(gtps_2) > 1:
+    #     mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
+    #
+    # # lasse die gefundenen Pattern einmal durch die fix_var laufen
+    # mfv = []
+    # for gp_mfv2 in mfv2:
+    #     mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
+    #     for gp_res in mfv_res:
+    #         mfv.append(gp_res)
+    #
+    # # evaluiere die so gefundenen Pattern
+    # res_eval = eval_gp_list(gtp_scores, mfv)
+    # return res_eval
+
+    # evaluiere die gefixten pattern
+    res_eval = eval_gp_list(gtp_scores, gp_list)
+    return res_eval
+
+
+# Runs a given (as String) query against the Sparql-endpoint
+def run_query(q):
+    try:
+        q_short = ' '.join((line.strip() for line in q.split('\n')))
+        sparql.setQuery(q_short)
+        cal = time_func(sparql.queryAndConvert)
+    except socket.timeout:
+        cal = (timeout, {})
+    except ValueError:
+        # e.g. if the endpoint gives us bad JSON for some unicode chars
+        logger.info(
+            'Could not parse result for query, assuming empty result...\n'
+            'Query:\n%s\nException:', q,
+            exc_info=1,  # appends exception to message
+        )
+        cal = (timeout, {})
+    return cal
+
+
+# Checks if an found RDF-Term can be used as value in a new query
+# (without conflicts)
+def list_remove_bool(var):
+    if isinstance(var, Literal):
+        i_n3 = var.n3()
+        if len(i_n3) > 60:
+            return True
+    elif isinstance(var, BNode):
+        return True
+    # echt hässlich, aber die einzige Möglichkeit, die ich gesehen habe um
+    # keine Probleme mit dem Category:Cigarettes-Beispiel zu bekommen
+    # (siehe docs)
+    # TODO: Möglicherweise dafür sorgen, dass die nicht rausgeschmissen,
+    # sondern nur nicht mit prefix gekürzt werden, also einfach mal schauen,
+    # dass die curify das tut was sie soll
+    elif isinstance(var, URIRef):
+        return ':' in var[7:]
+    return False
+
+
+# evaluates a given graph-pattern-list
+def eval_gp_list(gtp_scores, gp_list):
+    for gp_l in gp_list:
+        res_ev = evaluate(
+            sparql, timeout, gtp_scores, gp_l, run=0, gen=0)
+        update_individuals([gp_l], [res_ev])
+        # print_graph_pattern(gp_, print_matching_node_pairs=0)
+    return gp_list
+
+
+if __name__ == '__main__':
+    # # test_sample:
+    # res = []
+    # for i in range(10):
+    #     res_ts = test_sample(ground_truth_pairs_2)
+    #     for gp_ts in res_ts:
+    #         res.append(gp_ts)
+    #
+    # res = sorted(res, key=lambda gp_: -gp_.fitness.values.score)
+    # for res_ in res:
+    #     print_graph_pattern(res_)
+
+    # test_count
+    res = []
+    for i in range(1):
+        ground_truth_pairs_5 = get_semantic_associations()
+        ground_truth_pairs_5 = random.sample(ground_truth_pairs_5, 200)
+        max_out_steps = [10, 15, 20, 25, 30, 40, 50, 75, 100]
+        for j in max_out_steps:
+            res_ts = test_count(ground_truth_pairs_5, j)
+            for gp_ts in res_ts:
+                res.append((gp_ts, j))
+
+    res = sorted(res, key=lambda gp_: -gp_[0].fitness.values.score)
+    res = res[0:100]
+    for res_ in res:
+        print('max_out:'+str(res_[1]))
+        print_graph_pattern(res_[0])