{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ynvqgqCbIRqi" }, "source": [ "# Convert GPS-tracked bus movement data from JSON to GeoJSON format\n", "\n", "*Written by Men Vuthy, 2022*\n", "\n", "---\n" ] }, { "cell_type": "markdown", "metadata": { "id": "WnuybrH7IrDx" }, "source": [ "### Objective\n", "\n", "\n", "\n", "* The objective is to convert the JSON file of GPS data which recorded the movement of city bus in Phnom Penh to GeoJSON file format for analysis in GIS.\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "File content before conversion:\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "mZNKQ7gZI4uT" }, "source": [ "### Environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cZR4GFo63xLd" }, "outputs": [], "source": [ "!pip install geopandas\n", "!pip install contextily\n", "!pip install mapclassify" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bd9PuIRE39A7", "outputId": "daaa35c1-6639-4cc0-ca38-b730b4454b03" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/content/drive/MyDrive/Colab Notebooks/Bus\n" ] } ], "source": [ "cd /content/drive/MyDrive/Colab Notebooks/Bus" ] }, { "cell_type": "markdown", "metadata": { "id": "mf_iGanyJo8P" }, "source": [ "### Code" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "Xd5PMe0P4Mw5" }, "outputs": [], "source": [ "# Import necessary module\n", "import json\n", "import geopandas as gpd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "hm804nJ49eVq" }, "outputs": [], "source": [ "# Read json file\n", "input_file = json.load(open(\"data.json\", \"r\", encoding=\"utf-8\"))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jUBmE_iSMveF", "outputId": "79a99db0-b4c1-4f59-b6ee-a5060aea9755" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The length of data is 13010\n", "The variables of first data:\n" ] }, { "data": { "text/plain": [ "{'_id': {'$oid': '625ce66611021109ba081525'},\n", " 'device_id': '0358735074119172',\n", " 'date': '1604120c0a11',\n", " 'set_count': 'cf',\n", " 'latitude_raw': '013db852',\n", " 'longitude_raw': '0b41b0c0',\n", " 'latitude': 11.567832222222222,\n", " 'longitude': 104.91914666666666,\n", " 'speed': 15,\n", " 'orientation': 'd4ef',\n", " 'lbs': '01c808273a002b33',\n", " 'device_info': '00000010',\n", " 'power': '0f',\n", " 'gsm': 'a8',\n", " 'alert': '0d',\n", " 'power_status': '0',\n", " 'gps_status': '0',\n", " 'charge_status': '0',\n", " 'acc_status': '1',\n", " 'defence_status': '0',\n", " 'from_cmd': 'ping',\n", " 'location': {'type': 'Point',\n", " 'coordinates': [104.91914666666666, 11.567832222222222]},\n", " 'timespan': '2022-04-18 04:17:42',\n", " 'last_submit': '2022-04-18 04:17:42'}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check properties of data\n", "\n", "# Check length\n", "print('The length of data is', len(input_file))\n", "\n", "# Check variables\n", "print('The variables of first data:')\n", "input_file[0]" ] }, { "cell_type": "markdown", "metadata": { "id": "BYtDch_vNUcV" }, "source": [ "As seen above, the variables inside data properties are not in GeoJSON format. Thus, we need to rearrange them to a proper GeoJSON property file format. \n", "\n", "The format can be referred to https://geojson.io/#map=2/20.0/0.0." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "E4XjZIEs9zcQ" }, "outputs": [], "source": [ "# Code to rearange the variable to correct format of GeoJSON\n", "geojs = {\n", " \"type\": \"FeatureCollection\",\n", " \"features\":[\n", " {\n", " \"type\":\"Feature\",\n", " \"properties\": {\n", " '_id': d[\"_id\"],\n", " 'acc_status': d[\"acc_status\"],\n", " 'alert': d[\"alert\"],\n", " 'charge_status': d[\"charge_status\"],\n", " 'date': d[\"date\"],\n", " 'defence_status': d[\"defence_status\"],\n", " 'device_id': d[\"device_id\"],\n", " 'device_info': d[\"device_info\"],\n", " 'from_cmd': d[\"from_cmd\"],\n", " 'gps_status': d[\"gps_status\"],\n", " 'gsm': d[\"gsm\"],\n", " 'last_submit': d[\"last_submit\"],\n", " 'latitude': d[\"latitude\"],\n", " 'latitude_raw': d[\"latitude_raw\"],\n", " 'lbs': d[\"lbs\"],\n", " 'location': d[\"location\"],\n", " 'longitude': d[\"longitude\"],\n", " 'longitude_raw': d[\"longitude_raw\"],\n", " 'orientation': d[\"orientation\"],\n", " 'power': d[\"power\"],\n", " 'power_status': d[\"power_status\"],\n", " 'set_count': d[\"set_count\"],\n", " 'speed': d[\"speed\"],\n", " 'timespan': d[\"timespan\"]\n", " }, \n", "\n", " \"geometry\": {\n", " \"type\":\"Point\",\n", " \"coordinates\": d[\"location\"][\"coordinates\"],\n", " } \n", " } for d in input_file \n", " ] \n", " }\n", "\n", "# Save to a new file\n", "output_file=open(\"geodata.json\", \"w\", encoding=\"utf-8\")\n", "json.dump(geojs, output_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "File content after conversion:\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": { "id": "y6H-DrtbXwD8" }, "source": [ "Read newly-created GeoJSON file and visualize speed data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 473 }, "id": "CjnTnJqI5CcO", "outputId": "23822f79-bad0-4706-d73c-d43a2ab19c49" }, "outputs": [ { "data": { "text/html": [ "\n", "
| \n", " | _id | \n", "acc_status | \n", "alert | \n", "charge_status | \n", "date | \n", "defence_status | \n", "device_id | \n", "device_info | \n", "from_cmd | \n", "gps_status | \n", "... | \n", "location | \n", "longitude | \n", "longitude_raw | \n", "orientation | \n", "power | \n", "power_status | \n", "set_count | \n", "speed | \n", "timespan | \n", "geometry | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "{'$oid': '625ce66611021109ba081525'} | \n", "1 | \n", "0d | \n", "0 | \n", "1604120c0a11 | \n", "0 | \n", "0358735074119172 | \n", "00000010 | \n", "ping | \n", "0 | \n", "... | \n", "{'type': 'Point', 'coordinates': [104.91914666... | \n", "104.919147 | \n", "0b41b0c0 | \n", "d4ef | \n", "0f | \n", "0 | \n", "cf | \n", "15 | \n", "2022-04-18T04:17:42 | \n", "POINT (104.91915 11.56783) | \n", "
| 1 | \n", "{'$oid': '625ce6d511021178ef08168e'} | \n", "1 | \n", "0d | \n", "1 | \n", "1604120c0c2e | \n", "1 | \n", "0358735074119172 | \n", "00100111 | \n", "ping | \n", "0 | \n", "... | \n", "{'type': 'Point', 'coordinates': [104.91769777... | \n", "104.917698 | \n", "0b41a690 | \n", "d4e6 | \n", "be | \n", "0 | \n", "cf | \n", "22 | \n", "2022-04-18T04:19:33 | \n", "POINT (104.91770 11.56665) | \n", "
| 2 | \n", "{'$oid': '625ce6d611021175dd081693'} | \n", "1 | \n", "0d | \n", "0 | \n", "1604120c0e33 | \n", "1 | \n", "0358735074119172 | \n", "00101011 | \n", "ping | \n", "0 | \n", "... | \n", "{'type': 'Point', 'coordinates': [104.91549333... | \n", "104.915493 | \n", "0b419710 | \n", "d4e5 | \n", "fd | \n", "0 | \n", "cf | \n", "8 | \n", "2022-04-18T04:19:34 | \n", "POINT (104.91549 11.56489) | \n", "
| 3 | \n", "{'$oid': '625ce6d8110211c581081699'} | \n", "0 | \n", "0d | \n", "1 | \n", "1604120c0f2e | \n", "0 | \n", "0358735074119172 | \n", "00000100 | \n", "ping | \n", "0 | \n", "... | \n", "{'type': 'Point', 'coordinates': [104.91375111... | \n", "104.913751 | \n", "0b418ad0 | \n", "d4e6 | \n", "d8 | \n", "0 | \n", "cf | \n", "12 | \n", "2022-04-18T04:19:36 | \n", "POINT (104.91375 11.56349) | \n", "
| 4 | \n", "{'$oid': '625ce6d9110211599f08169c'} | \n", "0 | \n", "0d | \n", "0 | \n", "1604120c1124 | \n", "1 | \n", "0358735074119172 | \n", "00111001 | \n", "ping | \n", "0 | \n", "... | \n", "{'type': 'Point', 'coordinates': [104.91170666... | \n", "104.911707 | \n", "0b417c70 | \n", "d4d8 | \n", "42 | \n", "0 | \n", "cf | \n", "24 | \n", "2022-04-18T04:19:37 | \n", "POINT (104.91171 11.56169) | \n", "
5 rows × 25 columns
\n", "