https://github.com/hongtaoh/32vis
Tip revision: 9960413711b0efb1f51ff7cce3548d259be8d8cb authored by Hongtao Hao on 24 May 2025, 20:13:11 UTC
Update README.md
Update README.md
Tip revision: 9960413
Introduction.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "df5a1f59",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0b47ecac",
"metadata": {},
"outputs": [],
"source": [
"author = pd.read_csv('../data/ht_class/ht_cleaned_author_df.csv')\n",
"paper = pd.read_csv('../data/ht_class/ht_cleaned_paper_df.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d8d8d536",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(52, 27)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# first conference papers\n",
"first_conf_papers = paper[paper.Year == min(paper.Year)]\n",
"first_conf_papers.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "a857697a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"118"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# first conf authors\n",
"first_conf_authors = author[author.Year == min(author.Year)]\n",
"unique_first_conf_authors = list(\n",
" set(first_conf_authors['OpenAlex Author ID']))\n",
"len(unique_first_conf_authors)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "4fead9a0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Year</th>\n",
" <th>DOI</th>\n",
" <th>Title</th>\n",
" <th>Number of Authors</th>\n",
" <th>Author Position</th>\n",
" <th>Author Name</th>\n",
" <th>OpenAlex Author ID</th>\n",
" <th>Affiliation Name</th>\n",
" <th>Affiliation Country Code</th>\n",
" <th>Affiliation Type</th>\n",
" <th>Binary Type</th>\n",
" <th>Cross-type Collaboration</th>\n",
" <th>International Collaboration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7746</th>\n",
" <td>1990</td>\n",
" <td>10.1109/VISUAL.1990.146385</td>\n",
" <td>A journey into the fourth dimension</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>E.S. Panduranga</td>\n",
" <td>https://openalex.org/A2302757246</td>\n",
" <td>department of computer sciences johns hopkins ...</td>\n",
" <td>US</td>\n",
" <td>education</td>\n",
" <td>education</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9955</th>\n",
" <td>1990</td>\n",
" <td>10.1109/VISUAL.1990.146393</td>\n",
" <td>A numerical method for rendering spherical ref...</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>E.S. Panduranga</td>\n",
" <td>https://openalex.org/A2302757246</td>\n",
" <td>department of computer sciences johns hopkins ...</td>\n",
" <td>US</td>\n",
" <td>education</td>\n",
" <td>education</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Year DOI \\\n",
"7746 1990 10.1109/VISUAL.1990.146385 \n",
"9955 1990 10.1109/VISUAL.1990.146393 \n",
"\n",
" Title Number of Authors \\\n",
"7746 A journey into the fourth dimension 2.0 \n",
"9955 A numerical method for rendering spherical ref... 3.0 \n",
"\n",
" Author Position Author Name OpenAlex Author ID \\\n",
"7746 2.0 E.S. Panduranga https://openalex.org/A2302757246 \n",
"9955 2.0 E.S. Panduranga https://openalex.org/A2302757246 \n",
"\n",
" Affiliation Name \\\n",
"7746 department of computer sciences johns hopkins ... \n",
"9955 department of computer sciences johns hopkins ... \n",
"\n",
" Affiliation Country Code Affiliation Type Binary Type \\\n",
"7746 US education education \n",
"9955 US education education \n",
"\n",
" Cross-type Collaboration International Collaboration \n",
"7746 False True \n",
"9955 False False "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# this person appeared twice in year 1990\n",
"dic = dict(Counter(first_conf_authors['OpenAlex Author ID']))\n",
"twice_id = [x for x in dic.keys() if dic[x] > 1]\n",
"first_conf_authors[first_conf_authors['OpenAlex Author ID'] == twice_id[0]]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "46813419",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Counter({'US': 109, 'DE': 4, 'AU': 3, 'CA': 1, 'FR': 2})"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# first conf author countries\n",
"Counter(first_conf_authors['Affiliation Country Code'])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "e3fd43f2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"117"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# how many rows miss openalex author id\n",
"author[author['OpenAlex Author ID'].isnull()].shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c70319a5",
"metadata": {},
"outputs": [],
"source": [
"# total number of unique authors\n",
"author_id_nonan = author[author['OpenAlex Author ID'].notnull()]\n",
"unique_total_authors = list(set(author_id_nonan['OpenAlex Author ID']))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "c4944f5b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6299"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(unique_total_authors)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "87ce81d1",
"metadata": {},
"outputs": [],
"source": [
"# total number of unique countries\n",
"unique_cntry_total_authors = list(set(author_id_nonan['Affiliation Country Code']))"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "2e8d450d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(unique_cntry_total_authors)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ccff5bc3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
