{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Example - Mapping Grid Data to Vector Data\n",
"\n",
"This is useful in the case where you want to get statistics for a specific raster\n",
"over a certain region. In this example, the vector data is a random region with\n",
"using SSURGO data."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import geopandas\n",
"\n",
"from geocube.api.core import make_geocube\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ssurgo_data = geopandas.read_file(\"../../test/test_data/input/soil_data_group.geojson\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cokey | \n",
" mukey | \n",
" drclassdcd | \n",
" hzdept_r | \n",
" chkey | \n",
" hzdepb_r | \n",
" claytotal_r | \n",
" sandtotal_r | \n",
" silttotal_r | \n",
" geometry | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 12577452 | \n",
" 271425 | \n",
" Somewhat poorly drained | \n",
" 0.0 | \n",
" 100034090 | \n",
" 5.0 | \n",
" 23.067675 | \n",
" 9.978338 | \n",
" 66.953987 | \n",
" MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... | \n",
"
\n",
" \n",
" 1 | \n",
" 12577452 | \n",
" 271425 | \n",
" Somewhat poorly drained | \n",
" 5.0 | \n",
" 100034090 | \n",
" 15.0 | \n",
" 23.067675 | \n",
" 9.978338 | \n",
" 66.953987 | \n",
" MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... | \n",
"
\n",
" \n",
" 2 | \n",
" 12577452 | \n",
" 271425 | \n",
" Somewhat poorly drained | \n",
" 15.0 | \n",
" 100034091 | \n",
" 30.0 | \n",
" 23.067675 | \n",
" 9.978338 | \n",
" 66.953987 | \n",
" MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... | \n",
"
\n",
" \n",
" 3 | \n",
" 12577452 | \n",
" 271425 | \n",
" Somewhat poorly drained | \n",
" 30.0 | \n",
" 100034092 | \n",
" 45.0 | \n",
" 23.067675 | \n",
" 9.978338 | \n",
" 66.953987 | \n",
" MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... | \n",
"
\n",
" \n",
" 4 | \n",
" 12577452 | \n",
" 271425 | \n",
" Somewhat poorly drained | \n",
" 45.0 | \n",
" 100034093 | \n",
" 60.0 | \n",
" 23.231643 | \n",
" 9.961941 | \n",
" 66.806416 | \n",
" MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cokey mukey drclassdcd hzdept_r chkey hzdepb_r \\\n",
"0 12577452 271425 Somewhat poorly drained 0.0 100034090 5.0 \n",
"1 12577452 271425 Somewhat poorly drained 5.0 100034090 15.0 \n",
"2 12577452 271425 Somewhat poorly drained 15.0 100034091 30.0 \n",
"3 12577452 271425 Somewhat poorly drained 30.0 100034092 45.0 \n",
"4 12577452 271425 Somewhat poorly drained 45.0 100034093 60.0 \n",
"\n",
" claytotal_r sandtotal_r silttotal_r \\\n",
"0 23.067675 9.978338 66.953987 \n",
"1 23.067675 9.978338 66.953987 \n",
"2 23.067675 9.978338 66.953987 \n",
"3 23.067675 9.978338 66.953987 \n",
"4 23.231643 9.961941 66.806416 \n",
"\n",
" geometry \n",
"0 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n",
"1 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n",
"2 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n",
"3 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... \n",
"4 MULTIPOLYGON (((-90.59735 41.49255, -90.59730 ... "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ssurgo_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# convert the key to group to the vector data to an integer as that is one of the\n",
"# best data types for this type of mapping. If your data is not integer,\n",
"# then consider using a mapping of your data to an integer with something\n",
"# like a categorical dtype.\n",
"ssurgo_data[\"mukey\"] = ssurgo_data.mukey.astype(int)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Convert data to grid\n",
"\n",
"See docs for [make_geocube](../geocube.rst#make-geocube)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Dimensions: (hzdept_r: 11, x: 165, y: 165)\n",
"Coordinates:\n",
" * y (y) float64 41.5 41.5 41.5 41.5 ... 41.48 41.48 41.48 41.48\n",
" * x (x) float64 -90.6 -90.6 -90.6 -90.6 ... -90.58 -90.58 -90.58\n",
" * hzdept_r (hzdept_r) float64 0.0 5.0 15.0 30.0 ... 90.0 105.0 120.0 150.0\n",
" spatial_ref int64 0\n",
"Data variables:\n",
" mukey (hzdept_r, y, x) float64 1.988e+05 1.988e+05 ... 1.987e+05\n",
" hzdepb_r (hzdept_r, y, x) float64 5.0 5.0 5.0 5.0 ... 180.0 180.0 180.0\n",
" claytotal_r (hzdept_r, y, x) float64 26.0 26.0 26.0 26.0 ... 21.0 21.0 21.0\n",
" sandtotal_r (hzdept_r, y, x) float64 38.0 38.0 38.0 38.0 ... 10.0 10.0 10.0\n",
" silttotal_r (hzdept_r, y, x) float64 36.0 36.0 36.0 36.0 ... 69.0 69.0 69.0\n",
"Attributes:\n",
" grid_mapping: spatial_ref"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out_grid = make_geocube(\n",
" vector_data=ssurgo_data,\n",
" group_by='hzdept_r',\n",
" resolution=(-0.0001, 0.0001)\n",
")\n",
"out_grid"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get the mean/median of each region using the unique ID"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" hzdept_r | \n",
" spatial_ref | \n",
" hzdepb_r | \n",
" claytotal_r | \n",
" sandtotal_r | \n",
" silttotal_r | \n",
"
\n",
" \n",
" mukey | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 198692.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 23.000000 | \n",
" 7.000000 | \n",
" 70.000000 | \n",
"
\n",
" \n",
" 198714.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 5.000000 | \n",
" 87.000000 | \n",
" 8.000000 | \n",
"
\n",
" \n",
" 198724.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 21.000000 | \n",
" 10.000000 | \n",
" 69.000000 | \n",
"
\n",
" \n",
" 198750.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 12.000000 | \n",
" 63.000000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" 198754.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 26.000000 | \n",
" 38.000000 | \n",
" 36.000000 | \n",
"
\n",
" \n",
" 271425.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 23.067675 | \n",
" 9.978338 | \n",
" 66.953987 | \n",
"
\n",
" \n",
" 271431.0 | \n",
" 15.0 | \n",
" 0 | \n",
" 30.0 | \n",
" 14.000000 | \n",
" 55.000000 | \n",
" 31.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" hzdept_r spatial_ref hzdepb_r claytotal_r sandtotal_r \\\n",
"mukey \n",
"198692.0 15.0 0 30.0 23.000000 7.000000 \n",
"198714.0 15.0 0 30.0 5.000000 87.000000 \n",
"198724.0 15.0 0 30.0 21.000000 10.000000 \n",
"198750.0 15.0 0 30.0 12.000000 63.000000 \n",
"198754.0 15.0 0 30.0 26.000000 38.000000 \n",
"271425.0 15.0 0 30.0 23.067675 9.978338 \n",
"271431.0 15.0 0 30.0 14.000000 55.000000 \n",
"\n",
" silttotal_r \n",
"mukey \n",
"198692.0 70.000000 \n",
"198714.0 8.000000 \n",
"198724.0 69.000000 \n",
"198750.0 25.000000 \n",
"198754.0 36.000000 \n",
"271425.0 66.953987 \n",
"271431.0 31.000000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_mean = out_grid.sel(hzdept_r=15).groupby(out_grid.mukey.sel(hzdept_r=15)).mean()\n",
"grid_mean.to_dataframe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" hzdept_r | \n",
" spatial_ref | \n",
" hzdepb_r | \n",
" claytotal_r | \n",
" sandtotal_r | \n",
" silttotal_r | \n",
"
\n",
" \n",
" mukey | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 198692.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 23.000000 | \n",
" 7.000000 | \n",
" 70.000000 | \n",
"
\n",
" \n",
" 198714.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 7.800000 | \n",
" 86.466667 | \n",
" 5.733333 | \n",
"
\n",
" \n",
" 198724.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 21.000000 | \n",
" 10.000000 | \n",
" 69.000000 | \n",
"
\n",
" \n",
" 198750.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 12.000000 | \n",
" 63.000000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" 198754.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 26.000000 | \n",
" 38.000000 | \n",
" 36.000000 | \n",
"
\n",
" \n",
" 271425.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 24.564966 | \n",
" 10.120497 | \n",
" 65.314537 | \n",
"
\n",
" \n",
" 271431.0 | \n",
" 75.0 | \n",
" 0 | \n",
" 90.0 | \n",
" 8.333333 | \n",
" 74.666667 | \n",
" 17.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" hzdept_r spatial_ref hzdepb_r claytotal_r sandtotal_r \\\n",
"mukey \n",
"198692.0 75.0 0 90.0 23.000000 7.000000 \n",
"198714.0 75.0 0 90.0 7.800000 86.466667 \n",
"198724.0 75.0 0 90.0 21.000000 10.000000 \n",
"198750.0 75.0 0 90.0 12.000000 63.000000 \n",
"198754.0 75.0 0 90.0 26.000000 38.000000 \n",
"271425.0 75.0 0 90.0 24.564966 10.120497 \n",
"271431.0 75.0 0 90.0 8.333333 74.666667 \n",
"\n",
" silttotal_r \n",
"mukey \n",
"198692.0 70.000000 \n",
"198714.0 5.733333 \n",
"198724.0 69.000000 \n",
"198750.0 25.000000 \n",
"198754.0 36.000000 \n",
"271425.0 65.314537 \n",
"271431.0 17.000000 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_median = out_grid.sel(hzdept_r=75).groupby(out_grid.mukey.sel(hzdept_r=75)).median()\n",
"grid_median.to_dataframe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"vscode": {
"interpreter": {
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}