Tree-Grass ecosystem (Spain, Majadas de Tietar)#
This example demonstrates how to load and visualize datasets related to evapotranspiration (ETa) and land cover classification (CLC) over the Majadas region in Spain. The datasets are loaded using Pooch for remote file management. Two visualizations are created: one showing land cover classification (CLC) and another displaying the time series of evapotranspiration (ETa).
import pooch
import xarray as xr
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
from pathlib import Path
from centum.delineation import ETAnalysis
import numpy as np
from centum import plotting as pltC
from centum import utils
import geopandas as gpd
rootPathData= Path('/home/ben/Nextcloud/BenCSIC/Codes/tech4agro/test_Majadas_centum_dataset/')
Step 1: Download datasets using Pooch Pooch is used to manage the dataset downloads from the specified GitHub repository.
# pooch_Majadas = pooch.create(
# path=pooch.os_cache("Majadas_project"),
# base_url="https://github.com/BenjMy/test_Majadas_centum_dataset/raw/refs/heads/main/",
# registry={
# "20200403_LEVEL2_ECMWF_TPday.tif": None,
# "ETa_Majadas.netcdf": None,
# "ETp_Majadas.netcdf": None,
# "CLCover_Majadas.netcdf": None,
# },
# )
# pooch_Majadas = pooch.create(
# path=pooch.os_cache("Majadas_project"),
# base_url="https://github.com/BenjMy/test_Majadas_centum_dataset/raw/refs/heads/main/",
# registry={
# "ETa_Majadas.netcdf": None,
# "ETp_Majadas.netcdf": None,
# "CLC_Majadas_clipped.shp": None,
# "CLC_Majadas_clipped.shx": None,
# "CLC_Majadas_clipped.dbf": None,
# },
# )
# Majadas_ETa_dataset = pooch_Majadas.fetch('ETa_Majadas.netcdf')
# Majadas_ETp_dataset = pooch_Majadas.fetch('ETp_Majadas.netcdf')
# Majadas_CLC_dataset = pooch_Majadas.fetch('CLCover_Majadas.netcdf')
print('loading datasets')
Majadas_ETa_dataset = rootPathData/'ETa_Majadas_H2Bassin.netcdf'
Majadas_ETp_dataset = rootPathData/'ETp_Majadas_H2Bassin.netcdf'
Majadas_CLC_dataset = rootPathData/'CLCover_Majadas.netcdf'
Majadas_rain_dataset = rootPathData/'RAIN_Majadas_H2Bassin.netcdf'
Majadas_CLC_gdf = gpd.read_file(rootPathData/'BassinH2_Majadas_corrected.shp')
ETa_ds = xr.load_dataset(Majadas_ETa_dataset)
ETa_ds = ETa_ds.rename({"__xarray_dataarray_variable__": "ETa"}) # Rename the main variable to 'ETa'
ETp_ds = xr.load_dataset(Majadas_ETp_dataset)
ETp_ds = ETp_ds.rename({"__xarray_dataarray_variable__": "ETp"}) # Rename the main variable to 'ETa'
CLC = xr.load_dataset(Majadas_CLC_dataset) # Load the CLC dataset
loading datasets
---------------------------------------------------------------------------
DataSourceError Traceback (most recent call last)
Cell In[2], line 35
32 Majadas_CLC_dataset = rootPathData/'CLCover_Majadas.netcdf'
33 Majadas_rain_dataset = rootPathData/'RAIN_Majadas_H2Bassin.netcdf'
---> 35 Majadas_CLC_gdf = gpd.read_file(rootPathData/'BassinH2_Majadas_corrected.shp')
37 ETa_ds = xr.load_dataset(Majadas_ETa_dataset)
38 ETa_ds = ETa_ds.rename({"__xarray_dataarray_variable__": "ETa"}) # Rename the main variable to 'ETa'
File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/geopandas/io/file.py:294, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
291 from_bytes = True
293 if engine == "pyogrio":
--> 294 return _read_file_pyogrio(
295 filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
296 )
298 elif engine == "fiona":
299 if pd.api.types.is_file_like(filename):
File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/geopandas/io/file.py:547, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
538 warnings.warn(
539 "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
540 "will be removed in a future release. You can use the 'columns' keyword "
(...)
543 stacklevel=3,
544 )
545 kwargs["columns"] = kwargs.pop("include_fields")
--> 547 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pyogrio/geopandas.py:265, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
260 if not use_arrow:
261 # For arrow, datetimes are read as is.
262 # For numpy IO, datetimes are read as string values to preserve timezone info
263 # as numpy does not directly support timezones.
264 kwargs["datetime_as_string"] = True
--> 265 result = read_func(
266 path_or_buffer,
267 layer=layer,
268 encoding=encoding,
269 columns=columns,
270 read_geometry=read_geometry,
271 force_2d=gdal_force_2d,
272 skip_features=skip_features,
273 max_features=max_features,
274 where=where,
275 bbox=bbox,
276 mask=mask,
277 fids=fids,
278 sql=sql,
279 sql_dialect=sql_dialect,
280 return_fids=fid_as_index,
281 **kwargs,
282 )
284 if use_arrow:
285 meta, table = result
File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pyogrio/raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
59 """Read OGR data source into numpy arrays.
60
61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
(...)
194
195 """
196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 198 return ogr_read(
199 get_vsi_path_or_buffer(path_or_buffer),
200 layer=layer,
201 encoding=encoding,
202 columns=columns,
203 read_geometry=read_geometry,
204 force_2d=force_2d,
205 skip_features=skip_features,
206 max_features=max_features or 0,
207 where=where,
208 bbox=bbox,
209 mask=_mask_to_wkb(mask),
210 fids=fids,
211 sql=sql,
212 sql_dialect=sql_dialect,
213 return_fids=return_fids,
214 dataset_kwargs=dataset_kwargs,
215 datetime_as_string=datetime_as_string,
216 )
File pyogrio/_io.pyx:1240, in pyogrio._io.ogr_read()
File pyogrio/_io.pyx:220, in pyogrio._io.ogr_open()
DataSourceError: /home/ben/Nextcloud/BenCSIC/Codes/tech4agro/test_Majadas_centum_dataset/BassinH2_Majadas_corrected.shp: No such file or directory
Step 2: Corine Land Cover Visualization
x_coords_CLC = CLC['Code_CLC'].coords['x'].values
y_coords_CLC = CLC['Code_CLC'].coords['y'].values
fig, ax = plt.subplots()
im1 = ax.imshow(CLC['Code_CLC'].values, cmap='viridis', aspect='auto', origin='upper',
extent=[x_coords_CLC.min(), x_coords_CLC.max(), y_coords_CLC.min(), y_coords_CLC.max()])
ax.set_title('CLC Code')
ax.set_xlabel('X Coordinate')
ax.set_ylabel('Y Coordinate')
ax.axis('square') #
plt.tight_layout()
plt.show()
Step 3: Create an animated visualization of the ETa time series
ETa_ds_selec = ETa_ds.isel(time=slice(0, 25))
x_coords = ETa_ds_selec['ETa'].coords['x'].values
y_coords = ETa_ds_selec['ETa'].coords['y'].values
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(ETa_ds_selec['ETa'].isel(band=0).isel(time=0).values, cmap='coolwarm', origin='upper',
extent=[x_coords.min(), x_coords.max(), y_coords.min(), y_coords.max()])
ax.set_title('ETa Time Series') # Title for the time series plot
ax.set_xlabel('X Coordinate') # Label for the X axis
ax.set_ylabel('Y Coordinate') # Label for the Y axis
ax.axis('square') # Make the axis square for proper aspect ratio
cbar = fig.colorbar(im, ax=ax, orientation='vertical', extend='both', label='ETa')
def update(frame):
im.set_data(ETa_ds_selec['ETa'].isel(band=0).isel(time=frame).values)
ax.set_title(f'ETa Time Step: {frame}')
return [im]
ani = FuncAnimation(fig, update, frames=len(ETa_ds_selec['time']), interval=200, blit=True)
HTML(ani.to_jshtml()) # Show the animation in the notebook
print('running delineation')
threshold_local = 0.25
threshold_regional = 0.25
time_window = 1
irr_analysis_usingET = ETAnalysis()
ET_analysis_ds = ETa_ds.isel(band=0)
ET_analysis_ds = ET_analysis_ds.sortby("time")
ET_analysis_ds['ETp'] = ETp_ds['ETp'].isel(band=0)
window_size_x = (ET_analysis_ds.x.max() - ET_analysis_ds.x.min())/10
# Run the irrigation delineation
decision_ds, event_type = irr_analysis_usingET.irrigation_delineation(
ET_analysis_ds,
threshold_local=threshold_local,
threshold_regional=threshold_regional,
time_window=time_window,
window_size_x=window_size_x,
)
# event_type
print('plotting mask')
from pyproj import CRS
crs = CRS.from_wkt('PROJCS["unknown",GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Azimuthal_Equidistant"],PARAMETER["latitude_of_center",53],PARAMETER["longitude_of_center",24],PARAMETER["false_easting",5837287.81977],PARAMETER["false_northing",2121415.69617],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')
CLC = CLC.rio.write_crs(crs.to_wkt())
ETa_ds = ETa_ds.rio.write_crs(crs.to_wkt())
CLC_reproj = CLC.rio.reproject_match(ETa_ds)
Majadas_CLC_gdf_reproj = Majadas_CLC_gdf.to_crs(crs.to_wkt())
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
CLC_reproj['Code_18'].plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()
mask_CLC_IrrLand = CLC_reproj['Code_CLC']==212
mask_CLC_Agroforestry = CLC_reproj['Code_CLC']==244
np.sum(mask_CLC_IrrLand)
np.sum(mask_CLC_Agroforestry)
clc_codes = utils.get_CLC_code_def()
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
mask_CLC_IrrLand.plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
mask_CLC_Agroforestry.plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()
print('plotting time serie')
start_date = "2023-08-01"
end_date = "2023-08-15" # Changed to cover the entire summer
# decision_ds_filtered = decision_ds.sel(time=slice(start_date,end_date))
decision_ds_filtered = decision_ds
decision_ds_IrrLand = (
decision_ds_filtered
.where(mask_CLC_IrrLand)
.mean(['x', 'y'])
)
decision_ds_Agroforestry = (
decision_ds_filtered
.where(mask_CLC_Agroforestry)
.mean(['x', 'y'])
)
decision_ds_IrrLand.data_vars
fig, axs = plt.subplots(2,1)
decision_ds_IrrLand['ETa'].plot(x='time',label='ETa',ax=axs[0])
decision_ds_IrrLand['ratio_ETap_regional_spatial_avg_time_avg'].plot(x='time',
label='ratio_ETap_regional',
ax=axs[1])
decision_ds_IrrLand['ratio_ETap_local'].plot(x='time',
label='ratio_ETap_local',
ax=axs[1])
# decision_ds_IrrLand['condRain'].plot(x='time',
# label='condRain',
# ax=axs[1])
plt.legend()
def plot_decision_schedule(event_type,time_steps,fig,axes):
axes = axes.flatten() # Flatten to easily iterate over
cmap = plt.cm.colors.ListedColormap([
# 'white',
'green',
'red'
])
x_values = event_type['x'].values
y_values = event_type['y'].values
extent = [x_values.min(), x_values.max(), y_values.min(), y_values.max()]
for i, ax in enumerate(axes):
if i < time_steps: # Only plot if there is corresponding data
data = event_type.isel(time=i).values # or event_type.sel(time=...) if using labels
img = ax.imshow(data,
cmap=cmap,
vmin=0,
vmax=1,
extent=extent,
origin='lower'
)
ax.set_xlabel('x') # Label for the x-axis
ax.set_ylabel('y') # Label for the y-axis
else:
ax.axis('off') # Turn off empty subplots
# Adjust layout to prevent overlap
plt.tight_layout()
cbar = fig.colorbar(plt.cm.ScalarMappable(cmap=cmap,
norm=plt.Normalize(vmin=0,
vmax=1)
),
ax=axes,
orientation='horizontal',
fraction=0.02, pad=0.04) # Adjust placement
cbar.set_ticks([0, 1])
cbar.set_ticklabels(['True', 'False'])
pass
print('plotting decision rain')
decision_ds_filtered = decision_ds.sel(time=slice(start_date,end_date))
time_steps = decision_ds_filtered.time.size
ncols = 4
nrows = int(np.ceil(time_steps / ncols)) # Number of rows needed
fig, axes = plt.subplots(nrows=nrows, ncols=ncols,
figsize=(15, nrows * 3)
)
plot_decision_schedule(decision_ds_filtered['condRain'],
time_steps,
fig,axes)
for axi in axes.flatten():
Majadas_CLC_gdf_reproj.plot(ax=axi,
edgecolor="black",
facecolor="none",
# alpha=1,
)
# Majadas_CLC_gdf_reproj.plot()
event_type_sorted = event_type.sortby("time")
event_type_filtered = event_type_sorted.sel(time=slice(start_date,end_date))
event_type_IrrLand = (
event_type_filtered
.where(mask_CLC_IrrLand)
# .mean(['x', 'y'])
)
event_type_Agroforestry = (
event_type_filtered
.where(mask_CLC_Agroforestry)
# .mean(['x', 'y'])
)
np.sum(event_type)
np.sum(event_type_Agroforestry)
ncols = 4
time_steps = event_type_IrrLand.time.size
nrows = int(np.ceil(time_steps / ncols)) # Number of rows needed
fig, axes = plt.subplots(nrows=nrows, ncols=ncols,
figsize=(15, nrows * 3)
)
pltC.plot_irrigation_schedule(event_type_IrrLand,
time_steps,
fig,
axes
)
fig, axes = plt.subplots(nrows=nrows, ncols=ncols,
figsize=(15, nrows * 3)
)
pltC.plot_irrigation_schedule(event_type_Agroforestry,
time_steps,
fig,
axes
)
# event_type_node_IN = (
# event_type_filtered
# .where(mask_CLCselected_land_covers{lci+1}"])
# .mean(['x', 'y'])
# )