diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index f71933de..f02572de 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -24,6 +24,6 @@ jobs: - name: Publish package to PyPI if: startsWith(github.event.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@main + uses: pypa/gh-action-pypi-publish@master with: password: ${{ secrets.pypi_password }} diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py index f3f2030d..645ccd59 100644 --- a/scholarly/publication_parser.py +++ b/scholarly/publication_parser.py @@ -270,6 +270,12 @@ def fill(self, publication: Publication)->Publication: url = _CITATIONPUB.format(publication['author_pub_id']) soup = self.nav._get_soup(url) publication['bib']['title'] = soup.find('div', id='gsc_oci_title').text + if publication['bib']['title'][-1] == '\u2026': + merged_snippet = soup.find('div', class_='gsc_oci_merged_snippet') + if merged_snippet: + title_div = merged_snippet.find('div') + if title_div: + publication['bib']['title'] = title_div.text if soup.find('a', class_='gsc_oci_title_link'): publication['pub_url'] = soup.find( 'a', class_='gsc_oci_title_link')['href'] diff --git a/setup.py b/setup.py index 65dccf27..d4ea528a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='scholarly', - version='1.4.2', + version='1.4.3', author='Steven A. Cholewiak, Panos Ipeirotis, Victor Silva, Arun Kannawadi', author_email='steven@cholewiak.com, panos@stern.nyu.edu, vsilva@ualberta.ca, arunkannawadi@astro.princeton.edu', description='Simple access to Google Scholar authors and citations', diff --git a/test_module.py b/test_module.py index 21a0ef06..fe28e2a0 100644 --- a/test_module.py +++ b/test_module.py @@ -298,6 +298,28 @@ def cpy_decoder(di): serialized = json.dumps(pub) pub_loaded = json.loads(serialized, object_hook=cpy_decoder) self.assertEqual(pub, pub_loaded) + + def test_full_title(self): + """ + Test if the full title of a long title-publication gets retrieved. + The code under test gets executed if: + publication['source'] == PublicationSource.AUTHOR_PUBLICATION_ENTRY + so the long title-publication is taken from an author object. + """ + author = scholarly.search_author_id('Xxjj6IsAAAAJ') + author = scholarly.fill(author, sections=['publications']) + pub_index = -1 + for i in range(len(author['publications'])): + if author['publications'][i]['author_pub_id'] == 'Xxjj6IsAAAAJ:u_35RYKgDlwC': + pub_index = i + self.assertGreaterEqual(i, 0) + # elided title + self.assertEqual(author['publications'][pub_index]['bib']['title'], + u'Evaluation of toxicity of Dichlorvos (Nuvan) to fresh water fish Anabas testudineus and possible modulation by crude aqueous extract of Andrographis paniculata: A preliminary …') + # full text + pub = scholarly.fill(author['publications'][pub_index]) + self.assertEqual(pub['bib']['title'], + u'Evaluation of toxicity of Dichlorvos (Nuvan) to fresh water fish Anabas testudineus and possible modulation by crude aqueous extract of Andrographis paniculata: A preliminary investigation') if __name__ == '__main__':