diff --git a/parsel/selector.py b/parsel/selector.py
index 282f2e13..be297bef 100644
--- a/parsel/selector.py
+++ b/parsel/selector.py
@@ -98,6 +98,18 @@ def extract_first(self, default=None):
else:
return default
+ def text_content(self):
+ """
+ Call the ``.text_content()`` method for each element is this list and return
+ their results flattened, as a list of unicode strings.
+ """
+ return [x.text_content() for x in self]
+
+ def text_content_first(self, default=None):
+ for x in self:
+ return x.text_content()
+ else:
+ return default
class Selector(object):
"""
@@ -222,6 +234,13 @@ def extract(self):
else:
return six.text_type(self.root)
+ def text_content(self):
+ """
+ Returns the text content of the element, including the text content of
+ its children, with no markup.
+ """
+ return six.text_type(self.root.xpath("normalize-space()"))
+
def register_namespace(self, prefix, uri):
"""
Register the given namespace to be used in this :class:`Selector`.
diff --git a/tests/test_selector.py b/tests/test_selector.py
index 29446d4e..3c0e717b 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -86,6 +86,61 @@ def test_extract_first_default(self):
self.assertEqual(sel.xpath('//div/text()').extract_first(default='missing'), 'missing')
+ def test_text_content_first(self):
+ """Test if text_first() returns first element"""
+ body = u'
'
+ sel = self.sscls(text=body)
+
+ self.assertEqual(sel.xpath('//ul/li').text_content_first(),
+ sel.xpath('//ul/li').text_content()[0])
+
+ self.assertEqual(sel.xpath('//ul/li[@id="1"]').text_content_first(),
+ sel.xpath('//ul/li[@id="1"]').text_content()[0])
+
+ self.assertEqual(sel.xpath('//ul/li[2]').text_content_first(),
+ sel.xpath('//ul/li').text_content()[1])
+
+ self.assertEqual(sel.xpath('//ul/li[@id="doesnt-exist"]').text_content_first(), None)
+
+ self.assertEqual(sel.xpath('//ul/li').text_content_first(), '1')
+
+ self.assertEqual(sel.xpath('//ul/li[2]').text_content_first(), '2'),
+
+ self.assertEqual(sel.xpath('//ul').text_content_first(), '12'),
+
+ def test_text_content_first_default(self):
+ """Test if text_first() returns default value when no results found"""
+ body = u''
+ sel = self.sscls(text=body)
+
+ self.assertEqual(sel.xpath('//div').text_content_first(default='missing'), 'missing')
+
+ def test_text_content(self):
+ """Test if text_first() returns default value when no results found"""
+ body = u''
+ sel = self.sscls(text=body)
+
+ self.assertEqual(sel.xpath('//ul').text_content(), [u'12'])
+ self.assertEqual(sel.xpath('//ul/li').text_content(), [u'1', u'2'])
+
+ def test_text_content_with_spaces(self):
+ """Test if text_first() returns default value when no results found"""
+ body = u"""
+
+ Mary had a little
+ lamb
+
+ meh meh
+
+ It's
+ fleece
+ was white as snow.
+
+ """
+ sel = self.sscls(text=body)
+
+ self.assertEqual(sel.xpath('//p').text_content(), [u'Mary had a little lamb', u'It\'s fleece was white as snow.'])
+
def test_re_first(self):
"""Test if re_first() returns first matched element"""
body = u''