{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example - Mapping Grid Data to Vector Data\n", "\n", "This is useful in the case where you want to get statistics for a specific raster\n", "over a certain region. In this example, the vector data is a random region with\n", "using SSURGO data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "import geopandas\n", "\n", "from geocube.api.core import make_geocube\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ssurgo_data = geopandas.read_file(\"../../test/test_data/input/soil_data_group.geojson\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cokeymukeydrclassdcdhzdept_rchkeyhzdepb_rclaytotal_rsandtotal_rsilttotal_rgeometry
012577452271425Somewhat poorly drained0.01000340905.023.0676759.97833866.953987MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ...
112577452271425Somewhat poorly drained5.010003409015.023.0676759.97833866.953987MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ...
212577452271425Somewhat poorly drained15.010003409130.023.0676759.97833866.953987MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ...
312577452271425Somewhat poorly drained30.010003409245.023.0676759.97833866.953987MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ...
412577452271425Somewhat poorly drained45.010003409360.023.2316439.96194166.806416MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ...
\n", "
" ], "text/plain": [ " cokey mukey drclassdcd hzdept_r chkey hzdepb_r \\\n", "0 12577452 271425 Somewhat poorly drained 0.0 100034090 5.0 \n", "1 12577452 271425 Somewhat poorly drained 5.0 100034090 15.0 \n", "2 12577452 271425 Somewhat poorly drained 15.0 100034091 30.0 \n", "3 12577452 271425 Somewhat poorly drained 30.0 100034092 45.0 \n", "4 12577452 271425 Somewhat poorly drained 45.0 100034093 60.0 \n", "\n", " claytotal_r sandtotal_r silttotal_r \\\n", "0 23.067675 9.978338 66.953987 \n", "1 23.067675 9.978338 66.953987 \n", "2 23.067675 9.978338 66.953987 \n", "3 23.067675 9.978338 66.953987 \n", "4 23.231643 9.961941 66.806416 \n", "\n", " geometry \n", "0 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n", "1 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n", "2 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n", "3 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n", "4 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ssurgo_data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# convert the key to group to the vector data to an integer as that is one of the\n", "# best data types for this type of mapping. If your data is not integer,\n", "# then consider using a mapping of your data to an integer with something\n", "# like a categorical dtype.\n", "ssurgo_data[\"mukey\"] = ssurgo_data.mukey.astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert data to grid\n", "\n", "See docs for [make_geocube](../geocube.rst#make-geocube)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "Dimensions: (hzdept_r: 11, x: 165, y: 165)\n", "Coordinates:\n", " * y (y) float64 41.5 41.5 41.5 41.5 ... 41.48 41.48 41.48 41.48\n", " * x (x) float64 -90.6 -90.6 -90.6 -90.6 ... -90.58 -90.58 -90.58\n", " * hzdept_r (hzdept_r) float64 0.0 5.0 15.0 30.0 ... 90.0 105.0 120.0 150.0\n", " spatial_ref int64 0\n", "Data variables:\n", " mukey (hzdept_r, y, x) float64 1.988e+05 1.988e+05 ... 1.987e+05\n", " hzdepb_r (hzdept_r, y, x) float64 5.0 5.0 5.0 5.0 ... 180.0 180.0 180.0\n", " claytotal_r (hzdept_r, y, x) float64 26.0 26.0 26.0 26.0 ... 21.0 21.0 21.0\n", " sandtotal_r (hzdept_r, y, x) float64 38.0 38.0 38.0 38.0 ... 10.0 10.0 10.0\n", " silttotal_r (hzdept_r, y, x) float64 36.0 36.0 36.0 36.0 ... 69.0 69.0 69.0\n", "Attributes:\n", " grid_mapping: spatial_ref" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out_grid = make_geocube(\n", " vector_data=ssurgo_data,\n", " group_by='hzdept_r',\n", " resolution=(-0.0001, 0.0001)\n", ")\n", "out_grid" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get the mean/median of each region using the unique ID" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hzdept_rspatial_refhzdepb_rclaytotal_rsandtotal_rsilttotal_r
mukey
198692.015.0030.023.0000007.00000070.000000
198714.015.0030.05.00000087.0000008.000000
198724.015.0030.021.00000010.00000069.000000
198750.015.0030.012.00000063.00000025.000000
198754.015.0030.026.00000038.00000036.000000
271425.015.0030.023.0676759.97833866.953987
271431.015.0030.014.00000055.00000031.000000
\n", "
" ], "text/plain": [ " hzdept_r spatial_ref hzdepb_r claytotal_r sandtotal_r \\\n", "mukey \n", "198692.0 15.0 0 30.0 23.000000 7.000000 \n", "198714.0 15.0 0 30.0 5.000000 87.000000 \n", "198724.0 15.0 0 30.0 21.000000 10.000000 \n", "198750.0 15.0 0 30.0 12.000000 63.000000 \n", "198754.0 15.0 0 30.0 26.000000 38.000000 \n", "271425.0 15.0 0 30.0 23.067675 9.978338 \n", "271431.0 15.0 0 30.0 14.000000 55.000000 \n", "\n", " silttotal_r \n", "mukey \n", "198692.0 70.000000 \n", "198714.0 8.000000 \n", "198724.0 69.000000 \n", "198750.0 25.000000 \n", "198754.0 36.000000 \n", "271425.0 66.953987 \n", "271431.0 31.000000 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_mean = out_grid.sel(hzdept_r=15).groupby(out_grid.mukey.sel(hzdept_r=15)).mean()\n", "grid_mean.to_dataframe()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hzdept_rspatial_refhzdepb_rclaytotal_rsandtotal_rsilttotal_r
mukey
198692.075.0090.023.0000007.00000070.000000
198714.075.0090.07.80000086.4666675.733333
198724.075.0090.021.00000010.00000069.000000
198750.075.0090.012.00000063.00000025.000000
198754.075.0090.026.00000038.00000036.000000
271425.075.0090.024.56496610.12049765.314537
271431.075.0090.08.33333374.66666717.000000
\n", "
" ], "text/plain": [ " hzdept_r spatial_ref hzdepb_r claytotal_r sandtotal_r \\\n", "mukey \n", "198692.0 75.0 0 90.0 23.000000 7.000000 \n", "198714.0 75.0 0 90.0 7.800000 86.466667 \n", "198724.0 75.0 0 90.0 21.000000 10.000000 \n", "198750.0 75.0 0 90.0 12.000000 63.000000 \n", "198754.0 75.0 0 90.0 26.000000 38.000000 \n", "271425.0 75.0 0 90.0 24.564966 10.120497 \n", "271431.0 75.0 0 90.0 8.333333 74.666667 \n", "\n", " silttotal_r \n", "mukey \n", "198692.0 70.000000 \n", "198714.0 5.733333 \n", "198724.0 69.000000 \n", "198750.0 25.000000 \n", "198754.0 36.000000 \n", "271425.0 65.314537 \n", "271431.0 17.000000 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_median = out_grid.sel(hzdept_r=75).groupby(out_grid.mukey.sel(hzdept_r=75)).median()\n", "grid_median.to_dataframe()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.6 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "vscode": { "interpreter": { "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" } } }, "nbformat": 4, "nbformat_minor": 4 }