MaayanLab · u8sand · Feb 14, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 13, 2024
diff --git a/appyters/Researcher_Summary_Report_Appyter/appyter.json b/appyters/Researcher_Summary_Report_Appyter/appyter.json
@@ -2,7 +2,7 @@
     "$schema": "https://raw.githubusercontent.com/MaayanLab/appyter-catalog/main/schema/appyter-validator.json",
     "name": "Researcher_Summary_Report_Appyter",
     "title": "Researcher Summary Report",
-    "version": "0.0.2",
+    "version": "0.1.0",
     "description": "An appyter to summarize and display various research information output pertaining to the work of a given researcher.",
     "image": "thumbnail.png",
     "authors": [

diff --git a/appyters/Researcher_Summary_Report_Appyter/requirements.txt b/appyters/Researcher_Summary_Report_Appyter/requirements.txt
@@ -9,4 +9,6 @@ wordcloud
 scholarly
 kaleido
 beautifulsoup4
-Pillow==9.5.0
+Pillow==9.5.0
+google-search-results
+parsel
diff --git a/appyters/Researcher_Summary_Report_Appyter/researcher_summary_appyter.ipynb b/appyters/Researcher_Summary_Report_Appyter/researcher_summary_appyter.ipynb
@@ -34,7 +34,7 @@
     "{% set researcher_name = StringField(\n",
     "    name='researcher_name', \n",
     "    label='Investigator Name', \n",
-    "    default=\"Avi Ma'ayan\", \n",
+    "    default=\"Alan Attie\", \n",
     "    description='Input full name of PI along with middle names/middle initials followed by spaces as leaving it out may change the results of certain information', \n",
     "    section='Data_Section1', \n",
     "    required = True\n",
@@ -108,6 +108,18 @@
     "Entrez.tool = 'Demoscript'"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ssl\n",
+    "ssl._create_default_https_context = ssl._create_unverified_context\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -250,6 +262,56 @@
     "open_alex_display = getting_information_from_openalex(name_of_researcher_first_and_last, output_folder)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%appyter markdown\n",
+    "## Citations per year for {{researcher_name.raw_value}}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PUBMED CITATION INFORMATION"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Searching for Citation Information from Pubmed for {}\".format(name_of_researcher_first_and_last))\n",
+    "#Pubmed API calls takes the pubmed_name of Lastname, First Initials\n",
+    "pmid_citation_dict = query_pubmed_citations(pubmed_name, name_of_researcher_first_and_last)\n",
+    "\n",
+    "if pmid_citation_dict != None and len(pmid_citation_dict)> 0:\n",
+    "    display(MyMarkdown(\"## Citation Information (from Pubmed)\"))\n",
+    "    year_keys = list(pmid_citation_dict.keys())\n",
+    "    year_keys.sort()\n",
+    "    pmid_citation_dict = {year:pmid_citation_dict[year] for year in year_keys}\n",
+    "    fig = make_bar_plot(pmid_citation_dict,'Year', \"Citations\", f\"Citations per Year\", \"Sourced from Pubmed\")\n",
+    "    fig_line = make_line_plot(pmid_citation_dict, 'Year', \"Citations\", f\"Cumulative Citations\", \"Sourced from Pubmed\")\n",
+    "    fig.show()\n",
+    "    fig.write_image(output_folder+'citations_bar_pubmed.png')\n",
+    "    figure_counter = display_figure_labels(output_folder, figure_counter, \"Citations that are connected to the publications each year for {}.\".format(name_of_researcher_first_and_last), title = 'citations_bar_pubmed')\n",
+    "\n",
+    "    fig_line.show()\n",
+    "    fig_line.write_image(output_folder+'citations_line_graph_pubmed.png')\n",
+    "    figure_counter = display_figure_labels(output_folder, figure_counter, \"The cumulative citations that are connected to the publications each year for {}\".format(name_of_researcher_first_and_last), title = 'citations_line_graph_pubmed')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SEMANTIC SCHOLAR CITATION INFORMATION"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -264,8 +326,8 @@
     "    display(MyMarkdown(\"## Citation Information (from Semantic Scholar)\"))\n",
     "    display(MyMarkdown(\"The citation information counts is taken from the profile page of the given author name from [Semantic Scholar](https://www.semanticscholar.org/)\"))\n",
     "\n",
-    "    fig = make_bar_plot(semantic_citation_dict,'Year', \"Citations\", f\"Citations per Year\", \"Sourced from Semantic Scholar\")\n",
-    "    fig_line = make_line_plot(semantic_citation_dict, 'Year', \"Citations\", f\"Cumulative Citations\", \"Sourced from Semantic Scholar\")\n",
+    "    fig = make_bar_plot(semantic_citation_dict,'Year of Publication', \"Total Times Cited\", f\"Total Current Citations for Publications' Year\", \"Sourced from Semantic Scholar\")\n",
+    "    fig_line = make_line_plot(semantic_citation_dict, 'Year of Publication', \"Total Cited\", f\"Cumulative Citations\", \"Sourced from Semantic Scholar\")\n",
     "    fig.show()\n",
     "    fig.write_image(output_folder+'citations_bar_semantic.png')\n",
     "    figure_counter = display_figure_labels(output_folder, figure_counter, \"Citations that are connected to the publications each year for {}.\".format(name_of_researcher_first_and_last), title = 'citations_bar_semantic')\n",
@@ -534,7 +596,8 @@
    "outputs": [],
    "source": [
     "GENESHOT_URL = 'https://maayanlab.cloud/geneshot/api/search'\n",
-    "payload = {\"rif\": \"autorif\", \"term\": pubmed_name}\n",
+    "geneshot_name = re.sub('[^A-Za-z0-9 ]+', '', pubmed_name)\n",
+    "payload = {\"rif\": \"autorif\", \"term\": geneshot_name}\n",
     "response = requests.post(GENESHOT_URL, json=payload)\n",
     "gene_count = {}\n",
     "# Setting up the url parameters to go to actual gene shot link\n",
@@ -753,7 +816,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.9.4"
   },
   "vscode": {
    "interpreter": {

diff --git a/appyters/Researcher_Summary_Report_Appyter/utils.py b/appyters/Researcher_Summary_Report_Appyter/utils.py
@@ -544,4 +544,49 @@ def display_summary_text_from_openalex_png(institution = '', interests = [], h_i
         plt.axis('off')
     plt.show()
     return display_list
-
+from Bio import Entrez
+# Setting Entrez tool parameter
+Entrez.email = '[email protected]'
+Entrez.tool = 'Demoscript'
+from urllib.parse import urlencode
+def query_pubmed_citations(pubmed_name, name_of_researcher_first_and_last):
+    params = {
+        'term': "{}".format(pubmed_name)
+    }
+    pmid_citation_dict = defaultdict(int)
+    #Get the pubmed publications for the researcher with the pmids
+    info = Entrez.esearch(db="PubMed", term= pubmed_name, retmax = "5000") 
+    info = Entrez.read(info)
+    identifiers = info['IdList'] # Get list of identifiers which are pmids
+    if len(identifiers) == 0:
+        print("This name does not have any publications to search for citations returned from PubMed.")
+        return pmid_citation_dict
+    else:
+        print("This may take over a minute or two.")
+        display(MyMarkdown("### Link to [PubMed Query](https://pubmed.ncbi.nlm.nih.gov/?{}) for {}".format(urlencode(params), name_of_researcher_first_and_last)))
+        # Use the Entrez module efetch for the publication records for the PMIDs with text information included for each. 
+        records = Entrez.efetch(db="pubmed", id=identifiers, rettype="medline", retmode="text")
+        publications = records.read().split("\n\n")
+        for pub in publications:
+            try:
+                year_published = int(pub.split("DP  - ")[1].split('\n')[0].split()[0].strip()[:4])
+                pmid = pub.split("PMID-")[1].split('\n')[0].strip()
+                handle = Entrez.elink(dbfrom="pubmed", id=pmid, linkname="pubmed_pmc_refs")
+                record = Entrez.read(handle)
+                if len(record[0]["LinkSetDb"]) != 0:
+                    # print(record[0]["LinkSetDb"][0]["Link"])
+                    list_of_ids = []
+                    for id_dict in record[0]["LinkSetDb"][0]["Link"]:
+                        list_of_ids.append(id_dict['Id'])
+                    handle = Entrez.esummary(db="pmc", id=','.join(list_of_ids), retmode="xml")
+                    pub_records = Entrez.parse(handle)
+                    for record in pub_records:
+                        if 'PubDate' in record:
+                            if record['PubDate'][:4].isdigit():
+                                year_article_published = int(record['PubDate'][:4])
+                                pmid_citation_dict[year_article_published] += 1
+            except:
+                continue
+        if len(pmid_citation_dict) == 0:
+            print("This name does not have any publications to search for citations returned from PubMed.")
+        return pmid_citation_dict