https://github.com/hamzeiehsan/leisure-walking-analysis
Tip revision: 22c33b5a1a32915a600817db808f716df265b70b authored by hamzeiehsan on 27 April 2025, 10:08:03 UTC
add link to annotation tool
add link to annotation tool
Tip revision: 22c33b5
4_annotation_agreement.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"# sys\n",
"import sys\n",
"\n",
"# warning off\n",
"import warnings\n",
"# IO\n",
"import json\n",
"\n",
"# logging\n",
"from loguru import logger\n",
"\n",
"# set logger level\n",
"logger.remove(0)\n",
"logger.add(sys.stderr, level=\"INFO\")\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 283,
"metadata": {},
"outputs": [],
"source": [
"# reading annotation files\n",
"with open('dataset/annotations.json') as fp:\n",
" raw_annotations = json.load(fp)\n",
"\n",
"with open('dataset/annotated-osm-entities.json') as fp:\n",
" second_annotator = json.load(fp)"
]
},
{
"cell_type": "code",
"execution_count": 284,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-07-14 17:34:44.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mtotal annotations: 2400\u001b[0m\n",
"\u001b[32m2024-07-14 17:34:44.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m16\u001b[0m - \u001b[1mtotal annotations: 2400\u001b[0m\n"
]
}
],
"source": [
"first_annotator = {}\n",
"for annotation_record in raw_annotations:\n",
" page = None\n",
" annotation = []\n",
" for info in annotation_record:\n",
" if 'page' in info.keys():\n",
" page = str(info['page'])\n",
" elif 'type' in info.keys() and 'osm_id' in info.keys():\n",
" annotation.append(f\"{info['type']}:{info['osm_id']}\")\n",
" else:\n",
" logger.error(f'structural issues: {info}')\n",
"\n",
" if page is not None:\n",
" first_annotator[page] = annotation\n",
"\n",
"logger.info(f'total annotations: {len(first_annotator)}')"
]
},
{
"cell_type": "code",
"execution_count": 285,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['way:69366108', 'way:69366081']"
]
},
"execution_count": 285,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"first_annotator['6']"
]
},
{
"cell_type": "code",
"execution_count": 286,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['way:69366108', 'way:69366081']"
]
},
"execution_count": 286,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"second_annotator['6']"
]
},
{
"cell_type": "code",
"execution_count": 287,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['way:69366081', 'way:69366108']"
]
},
"execution_count": 287,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(set(second_annotator['6']).intersection(first_annotator['6']))"
]
},
{
"cell_type": "code",
"execution_count": 293,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-07-14 17:37:42.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mnot annotated by first annotator: 20\u001b[0m\n",
"\u001b[32m2024-07-14 17:37:42.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mnot annotated by first annotator: 20\u001b[0m\n"
]
}
],
"source": [
"matched = []\n",
"not_matched_from_first = []\n",
"not_matched_dict_first = {}\n",
"not_matched_from_second = []\n",
"not_matched_dict_second = {}\n",
"\n",
"status_list = []\n",
"\n",
"# agreed_first_and_second =[]\n",
"first_found = []\n",
"second_found = []\n",
"total = []\n",
"for poi_id, values in second_annotator.items():\n",
" if poi_id not in first_annotator.keys():\n",
" logger.info(f'not annotated by first annotator: {poi_id}')\n",
" continue\n",
" total.append(poi_id)\n",
" first_values = first_annotator[poi_id]\n",
" if len(values) > 0:\n",
" second_found.append(poi_id)\n",
" if len(first_values) > 0:\n",
" first_found.append(poi_id)\n",
" shared_osm_ids = list(set(second_annotator[poi_id]).intersection(first_annotator[poi_id]))\n",
" matched.extend(shared_osm_ids)\n",
" not_matched_from_first.extend([f for f in first_values if f not in shared_osm_ids and 'flag' not in f])\n",
" not_matched_dict_first[poi_id] = [f for f in first_values if f not in shared_osm_ids and 'flag' not in f]\n",
" not_matched_from_second.extend([s for s in values if s not in shared_osm_ids])\n",
" not_matched_dict_second[poi_id] = [s for s in values if s not in shared_osm_ids]\n",
" \n",
" if len(shared_osm_ids) == max(len(second_annotator[poi_id]), len(first_annotator[poi_id])):\n",
" status_list.append('FULLY_MATCHED')\n",
" elif len(shared_osm_ids) > 0:\n",
" status_list.append('PARTIALLY_MATCHED')\n",
" else:\n",
" status_list.append('NOT_MATCHED')"
]
},
{
"cell_type": "code",
"execution_count": 289,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-07-14 17:34:46.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mtotal overlap: 497 - first founds: 308 - second founds: 291 - shared: 279\u001b[0m\n",
"\u001b[32m2024-07-14 17:34:46.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mtotal overlap: 497 - first founds: 308 - second founds: 291 - shared: 279\u001b[0m\n"
]
}
],
"source": [
"shared_founds = list(set(second_found).intersection(first_found))\n",
"logger.info(f'total overlap: {len(total)} - first founds: {len(first_found)} - second founds: {len(second_found)} - shared: {len(shared_founds)}')"
]
},
{
"cell_type": "code",
"execution_count": 290,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-07-14 17:34:47.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mmatched: 345 - not matched from first: 69 - not matched from second: 42\u001b[0m\n",
"\u001b[32m2024-07-14 17:34:47.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mmatched: 345 - not matched from first: 69 - not matched from second: 42\u001b[0m\n"
]
}
],
"source": [
"logger.info(f'matched: {len(matched)} - not matched from first: {len(not_matched_from_first)} - not matched from second: {len(not_matched_from_second)}')"
]
},
{
"cell_type": "code",
"execution_count": 291,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['5',\n",
" '37',\n",
" '47',\n",
" '54',\n",
" '79',\n",
" '82',\n",
" '105',\n",
" '111',\n",
" '133',\n",
" '136',\n",
" '154',\n",
" '164',\n",
" '185',\n",
" '200',\n",
" '207',\n",
" '213',\n",
" '219',\n",
" '223',\n",
" '225',\n",
" '230',\n",
" '238',\n",
" '242',\n",
" '250',\n",
" '259',\n",
" '271',\n",
" '308',\n",
" '313',\n",
" '459',\n",
" '460']"
]
},
"execution_count": 291,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[f for f in first_found if f not in second_found]"
]
},
{
"cell_type": "code",
"execution_count": 294,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Counter({'FULLY_MATCHED': 407, 'PARTIALLY_MATCHED': 31, 'NOT_MATCHED': 59})"
]
},
"execution_count": 294,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from collections import Counter\n",
"Counter(status_list)"
]
},
{
"cell_type": "code",
"execution_count": 296,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"81.89134808853119"
]
},
"execution_count": 296,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"407/len(total)*100"
]
},
{
"cell_type": "code",
"execution_count": 297,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"88.12877263581488"
]
},
"execution_count": 297,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(407+31)/len(total)*100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
