{ "cells": [ { "cell_type": "code", "execution_count": 14, "id": "df5a1f59", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from collections import Counter" ] }, { "cell_type": "code", "execution_count": 3, "id": "0b47ecac", "metadata": {}, "outputs": [], "source": [ "author = pd.read_csv('../data/ht_class/ht_cleaned_author_df.csv')\n", "paper = pd.read_csv('../data/ht_class/ht_cleaned_paper_df.csv')" ] }, { "cell_type": "code", "execution_count": 6, "id": "d8d8d536", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(52, 27)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# first conference papers\n", "first_conf_papers = paper[paper.Year == min(paper.Year)]\n", "first_conf_papers.shape" ] }, { "cell_type": "code", "execution_count": 12, "id": "a857697a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "118" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# first conf authors\n", "first_conf_authors = author[author.Year == min(author.Year)]\n", "unique_first_conf_authors = list(\n", " set(first_conf_authors['OpenAlex Author ID']))\n", "len(unique_first_conf_authors)" ] }, { "cell_type": "code", "execution_count": 21, "id": "4fead9a0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearDOITitleNumber of AuthorsAuthor PositionAuthor NameOpenAlex Author IDAffiliation NameAffiliation Country CodeAffiliation TypeBinary TypeCross-type CollaborationInternational Collaboration
7746199010.1109/VISUAL.1990.146385A journey into the fourth dimension2.02.0E.S. Pandurangahttps://openalex.org/A2302757246department of computer sciences johns hopkins ...USeducationeducationFalseTrue
9955199010.1109/VISUAL.1990.146393A numerical method for rendering spherical ref...3.02.0E.S. Pandurangahttps://openalex.org/A2302757246department of computer sciences johns hopkins ...USeducationeducationFalseFalse
\n", "
" ], "text/plain": [ " Year DOI \\\n", "7746 1990 10.1109/VISUAL.1990.146385 \n", "9955 1990 10.1109/VISUAL.1990.146393 \n", "\n", " Title Number of Authors \\\n", "7746 A journey into the fourth dimension 2.0 \n", "9955 A numerical method for rendering spherical ref... 3.0 \n", "\n", " Author Position Author Name OpenAlex Author ID \\\n", "7746 2.0 E.S. Panduranga https://openalex.org/A2302757246 \n", "9955 2.0 E.S. Panduranga https://openalex.org/A2302757246 \n", "\n", " Affiliation Name \\\n", "7746 department of computer sciences johns hopkins ... \n", "9955 department of computer sciences johns hopkins ... \n", "\n", " Affiliation Country Code Affiliation Type Binary Type \\\n", "7746 US education education \n", "9955 US education education \n", "\n", " Cross-type Collaboration International Collaboration \n", "7746 False True \n", "9955 False False " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this person appeared twice in year 1990\n", "dic = dict(Counter(first_conf_authors['OpenAlex Author ID']))\n", "twice_id = [x for x in dic.keys() if dic[x] > 1]\n", "first_conf_authors[first_conf_authors['OpenAlex Author ID'] == twice_id[0]]" ] }, { "cell_type": "code", "execution_count": 22, "id": "46813419", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Counter({'US': 109, 'DE': 4, 'AU': 3, 'CA': 1, 'FR': 2})" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# first conf author countries\n", "Counter(first_conf_authors['Affiliation Country Code'])" ] }, { "cell_type": "code", "execution_count": 25, "id": "e3fd43f2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "117" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# how many rows miss openalex author id\n", "author[author['OpenAlex Author ID'].isnull()].shape[0]" ] }, { "cell_type": "code", "execution_count": 26, "id": "c70319a5", "metadata": {}, "outputs": [], "source": [ "# total number of unique authors\n", "author_id_nonan = author[author['OpenAlex Author ID'].notnull()]\n", "unique_total_authors = list(set(author_id_nonan['OpenAlex Author ID']))" ] }, { "cell_type": "code", "execution_count": 27, "id": "c4944f5b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6299" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(unique_total_authors)" ] }, { "cell_type": "code", "execution_count": 29, "id": "87ce81d1", "metadata": {}, "outputs": [], "source": [ "# total number of unique countries\n", "unique_cntry_total_authors = list(set(author_id_nonan['Affiliation Country Code']))" ] }, { "cell_type": "code", "execution_count": 30, "id": "2e8d450d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "42" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(unique_cntry_total_authors)" ] }, { "cell_type": "code", "execution_count": null, "id": "ccff5bc3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }