Tree-Grass ecosystem (Spain, Majadas de Tietar)

Tree-Grass ecosystem (Spain, Majadas de Tietar)#

This example demonstrates how to load and visualize datasets related to evapotranspiration (ETa) and land cover classification (CLC) over the Majadas region in Spain. The datasets are loaded using Pooch for remote file management. Two visualizations are created: one showing land cover classification (CLC) and another displaying the time series of evapotranspiration (ETa).

import pooch
import xarray as xr 
import matplotlib.pyplot as plt 
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
from pathlib import Path
from centum.delineation import ETAnalysis
import numpy as np
from centum import plotting as pltC
from centum import utils 
import geopandas as gpd


rootPathData= Path('/home/ben/Nextcloud/BenCSIC/Codes/tech4agro/test_Majadas_centum_dataset/')

Step 1: Download datasets using Pooch Pooch is used to manage the dataset downloads from the specified GitHub repository.

# pooch_Majadas = pooch.create(
#     path=pooch.os_cache("Majadas_project"),
#     base_url="https://github.com/BenjMy/test_Majadas_centum_dataset/raw/refs/heads/main/",
#     registry={
#         "20200403_LEVEL2_ECMWF_TPday.tif": None,
#         "ETa_Majadas.netcdf": None,
#         "ETp_Majadas.netcdf": None,
#         "CLCover_Majadas.netcdf": None,
#     },
# )

# pooch_Majadas = pooch.create(
#     path=pooch.os_cache("Majadas_project"),
#     base_url="https://github.com/BenjMy/test_Majadas_centum_dataset/raw/refs/heads/main/",
#     registry={
#         "ETa_Majadas.netcdf": None,
#         "ETp_Majadas.netcdf": None,
#         "CLC_Majadas_clipped.shp": None,
#         "CLC_Majadas_clipped.shx": None,
#         "CLC_Majadas_clipped.dbf": None,
#     },
# )

# Majadas_ETa_dataset = pooch_Majadas.fetch('ETa_Majadas.netcdf')
# Majadas_ETp_dataset = pooch_Majadas.fetch('ETp_Majadas.netcdf')
# Majadas_CLC_dataset = pooch_Majadas.fetch('CLCover_Majadas.netcdf')

print('loading datasets')

Majadas_ETa_dataset = rootPathData/'ETa_Majadas_H2Bassin.netcdf'
Majadas_ETp_dataset = rootPathData/'ETp_Majadas_H2Bassin.netcdf'
Majadas_CLC_dataset = rootPathData/'CLCover_Majadas.netcdf'
Majadas_rain_dataset = rootPathData/'RAIN_Majadas_H2Bassin.netcdf'

Majadas_CLC_gdf = gpd.read_file(rootPathData/'BassinH2_Majadas_corrected.shp')

ETa_ds = xr.load_dataset(Majadas_ETa_dataset)
ETa_ds = ETa_ds.rename({"__xarray_dataarray_variable__": "ETa"})  # Rename the main variable to 'ETa'

ETp_ds = xr.load_dataset(Majadas_ETp_dataset)
ETp_ds = ETp_ds.rename({"__xarray_dataarray_variable__": "ETp"})  # Rename the main variable to 'ETa'

CLC = xr.load_dataset(Majadas_CLC_dataset)  # Load the CLC dataset
loading datasets
---------------------------------------------------------------------------
DataSourceError                           Traceback (most recent call last)
Cell In[2], line 35
     32 Majadas_CLC_dataset = rootPathData/'CLCover_Majadas.netcdf'
     33 Majadas_rain_dataset = rootPathData/'RAIN_Majadas_H2Bassin.netcdf'
---> 35 Majadas_CLC_gdf = gpd.read_file(rootPathData/'BassinH2_Majadas_corrected.shp')
     37 ETa_ds = xr.load_dataset(Majadas_ETa_dataset)
     38 ETa_ds = ETa_ds.rename({"__xarray_dataarray_variable__": "ETa"})  # Rename the main variable to 'ETa'

File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/geopandas/io/file.py:294, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
    291             from_bytes = True
    293 if engine == "pyogrio":
--> 294     return _read_file_pyogrio(
    295         filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
    296     )
    298 elif engine == "fiona":
    299     if pd.api.types.is_file_like(filename):

File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/geopandas/io/file.py:547, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
    538     warnings.warn(
    539         "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
    540         "will be removed in a future release. You can use the 'columns' keyword "
   (...)
    543         stacklevel=3,
    544     )
    545     kwargs["columns"] = kwargs.pop("include_fields")
--> 547 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)

File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pyogrio/geopandas.py:265, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
    260 if not use_arrow:
    261     # For arrow, datetimes are read as is.
    262     # For numpy IO, datetimes are read as string values to preserve timezone info
    263     # as numpy does not directly support timezones.
    264     kwargs["datetime_as_string"] = True
--> 265 result = read_func(
    266     path_or_buffer,
    267     layer=layer,
    268     encoding=encoding,
    269     columns=columns,
    270     read_geometry=read_geometry,
    271     force_2d=gdal_force_2d,
    272     skip_features=skip_features,
    273     max_features=max_features,
    274     where=where,
    275     bbox=bbox,
    276     mask=mask,
    277     fids=fids,
    278     sql=sql,
    279     sql_dialect=sql_dialect,
    280     return_fids=fid_as_index,
    281     **kwargs,
    282 )
    284 if use_arrow:
    285     meta, table = result

File /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/site-packages/pyogrio/raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
     59 """Read OGR data source into numpy arrays.
     60 
     61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
   (...)
    194 
    195 """
    196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 198 return ogr_read(
    199     get_vsi_path_or_buffer(path_or_buffer),
    200     layer=layer,
    201     encoding=encoding,
    202     columns=columns,
    203     read_geometry=read_geometry,
    204     force_2d=force_2d,
    205     skip_features=skip_features,
    206     max_features=max_features or 0,
    207     where=where,
    208     bbox=bbox,
    209     mask=_mask_to_wkb(mask),
    210     fids=fids,
    211     sql=sql,
    212     sql_dialect=sql_dialect,
    213     return_fids=return_fids,
    214     dataset_kwargs=dataset_kwargs,
    215     datetime_as_string=datetime_as_string,
    216 )

File pyogrio/_io.pyx:1240, in pyogrio._io.ogr_read()

File pyogrio/_io.pyx:220, in pyogrio._io.ogr_open()

DataSourceError: /home/ben/Nextcloud/BenCSIC/Codes/tech4agro/test_Majadas_centum_dataset/BassinH2_Majadas_corrected.shp: No such file or directory

Step 2: Corine Land Cover Visualization

x_coords_CLC = CLC['Code_CLC'].coords['x'].values
y_coords_CLC = CLC['Code_CLC'].coords['y'].values

fig, ax = plt.subplots()
im1 = ax.imshow(CLC['Code_CLC'].values, cmap='viridis', aspect='auto', origin='upper',
                    extent=[x_coords_CLC.min(), x_coords_CLC.max(), y_coords_CLC.min(), y_coords_CLC.max()])
ax.set_title('CLC Code') 
ax.set_xlabel('X Coordinate') 
ax.set_ylabel('Y Coordinate') 
ax.axis('square')  #

plt.tight_layout()
plt.show()

Step 3: Create an animated visualization of the ETa time series

ETa_ds_selec = ETa_ds.isel(time=slice(0, 25))
x_coords = ETa_ds_selec['ETa'].coords['x'].values
y_coords = ETa_ds_selec['ETa'].coords['y'].values

fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(ETa_ds_selec['ETa'].isel(band=0).isel(time=0).values, cmap='coolwarm', origin='upper',
               extent=[x_coords.min(), x_coords.max(), y_coords.min(), y_coords.max()])
ax.set_title('ETa Time Series')  # Title for the time series plot
ax.set_xlabel('X Coordinate')  # Label for the X axis
ax.set_ylabel('Y Coordinate')  # Label for the Y axis
ax.axis('square')  # Make the axis square for proper aspect ratio

cbar = fig.colorbar(im, ax=ax, orientation='vertical', extend='both', label='ETa')

def update(frame):
    im.set_data(ETa_ds_selec['ETa'].isel(band=0).isel(time=frame).values)
    ax.set_title(f'ETa Time Step: {frame}')
    return [im]

ani = FuncAnimation(fig, update, frames=len(ETa_ds_selec['time']), interval=200, blit=True)
HTML(ani.to_jshtml())  # Show the animation in the notebook
print('running delineation')
threshold_local = 0.25
threshold_regional = 0.25
time_window = 1

irr_analysis_usingET = ETAnalysis()


ET_analysis_ds = ETa_ds.isel(band=0)
ET_analysis_ds = ET_analysis_ds.sortby("time")
ET_analysis_ds['ETp'] = ETp_ds['ETp'].isel(band=0)

window_size_x = (ET_analysis_ds.x.max() - ET_analysis_ds.x.min())/10

# Run the irrigation delineation
decision_ds, event_type = irr_analysis_usingET.irrigation_delineation(
                                                                        ET_analysis_ds,
                                                                        threshold_local=threshold_local,
                                                                        threshold_regional=threshold_regional,
                                                                        time_window=time_window,
                                                                        window_size_x=window_size_x,
                                                                    )

# event_type
print('plotting mask')

from pyproj import CRS
crs = CRS.from_wkt('PROJCS["unknown",GEOGCS["WGS 84",DATUM["World Geodetic System 1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Azimuthal_Equidistant"],PARAMETER["latitude_of_center",53],PARAMETER["longitude_of_center",24],PARAMETER["false_easting",5837287.81977],PARAMETER["false_northing",2121415.69617],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]')

CLC = CLC.rio.write_crs(crs.to_wkt())
ETa_ds = ETa_ds.rio.write_crs(crs.to_wkt())
CLC_reproj = CLC.rio.reproject_match(ETa_ds)

Majadas_CLC_gdf_reproj = Majadas_CLC_gdf.to_crs(crs.to_wkt())


fig, ax = plt.subplots(1, 1, figsize=(10, 5))
CLC_reproj['Code_18'].plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()


mask_CLC_IrrLand = CLC_reproj['Code_CLC']==212
mask_CLC_Agroforestry = CLC_reproj['Code_CLC']==244

np.sum(mask_CLC_IrrLand)
np.sum(mask_CLC_Agroforestry)

clc_codes = utils.get_CLC_code_def()


fig, ax = plt.subplots(1, 1, figsize=(10, 5))
mask_CLC_IrrLand.plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()

fig, ax = plt.subplots(1, 1, figsize=(10, 5))
mask_CLC_Agroforestry.plot.imshow(ax=ax, cmap='viridis')
ax.set_title("Majadas de Tietar Corine Land Cover")
plt.show()
print('plotting time serie')

start_date = "2023-08-01"
end_date = "2023-08-15"  # Changed to cover the entire summer
    

# decision_ds_filtered = decision_ds.sel(time=slice(start_date,end_date))
decision_ds_filtered = decision_ds


decision_ds_IrrLand = (
    decision_ds_filtered
    .where(mask_CLC_IrrLand)
    .mean(['x', 'y'])
)


decision_ds_Agroforestry = (
    decision_ds_filtered
    .where(mask_CLC_Agroforestry)
    .mean(['x', 'y'])
)

decision_ds_IrrLand.data_vars

fig, axs = plt.subplots(2,1)

decision_ds_IrrLand['ETa'].plot(x='time',label='ETa',ax=axs[0])
decision_ds_IrrLand['ratio_ETap_regional_spatial_avg_time_avg'].plot(x='time',
                                                                     label='ratio_ETap_regional',
                                                                     ax=axs[1])
decision_ds_IrrLand['ratio_ETap_local'].plot(x='time',
                                            label='ratio_ETap_local',
                                            ax=axs[1])

# decision_ds_IrrLand['condRain'].plot(x='time',
#                                         label='condRain',
#                                         ax=axs[1])
plt.legend()
def plot_decision_schedule(event_type,time_steps,fig,axes):
        
    axes = axes.flatten()  # Flatten to easily iterate over
    cmap = plt.cm.colors.ListedColormap([
                                         # 'white',
                                         'green', 
                                         'red'
                                         ])
    x_values = event_type['x'].values
    y_values = event_type['y'].values
    extent = [x_values.min(), x_values.max(), y_values.min(), y_values.max()]
    for i, ax in enumerate(axes):
        if i < time_steps:  # Only plot if there is corresponding data
            data = event_type.isel(time=i).values  # or event_type.sel(time=...) if using labels
            img = ax.imshow(data, 
                            cmap=cmap, 
                            vmin=0, 
                            vmax=1, 
                            extent=extent,
                            origin='lower'
                            )
            ax.set_xlabel('x')  # Label for the x-axis
            ax.set_ylabel('y')  # Label for the y-axis
        else:
            ax.axis('off')  # Turn off empty subplots
    
    # Adjust layout to prevent overlap
    plt.tight_layout()
    cbar = fig.colorbar(plt.cm.ScalarMappable(cmap=cmap, 
                                              norm=plt.Normalize(vmin=0, 
                                                                 vmax=1)
                                              ), 
                        ax=axes, 
                        orientation='horizontal', 
                        fraction=0.02, pad=0.04)  # Adjust placement
    cbar.set_ticks([0, 1])
    cbar.set_ticklabels(['True', 'False'])
    
    pass
print('plotting decision rain')

decision_ds_filtered = decision_ds.sel(time=slice(start_date,end_date))
time_steps = decision_ds_filtered.time.size
ncols = 4
nrows = int(np.ceil(time_steps / ncols))  # Number of rows needed

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, 
                         figsize=(15, nrows * 3)
                         )
plot_decision_schedule(decision_ds_filtered['condRain'],
                       time_steps,
                       fig,axes)

for axi in axes.flatten():
    Majadas_CLC_gdf_reproj.plot(ax=axi, 
                                edgecolor="black", 
                                facecolor="none", 
                                # alpha=1,
                                )


# Majadas_CLC_gdf_reproj.plot()
event_type_sorted = event_type.sortby("time")
event_type_filtered = event_type_sorted.sel(time=slice(start_date,end_date))


event_type_IrrLand = (
    event_type_filtered
    .where(mask_CLC_IrrLand)
    # .mean(['x', 'y'])
)


event_type_Agroforestry = (
    event_type_filtered
    .where(mask_CLC_Agroforestry)
    # .mean(['x', 'y'])
)


np.sum(event_type)
np.sum(event_type_Agroforestry)

ncols = 4
time_steps = event_type_IrrLand.time.size
nrows = int(np.ceil(time_steps / ncols))  # Number of rows needed
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, 
                         figsize=(15, nrows * 3)
                         )
pltC.plot_irrigation_schedule(event_type_IrrLand,
                              time_steps,
                              fig,
                              axes
                              )


fig, axes = plt.subplots(nrows=nrows, ncols=ncols, 
                         figsize=(15, nrows * 3)
                         )
pltC.plot_irrigation_schedule(event_type_Agroforestry,
                              time_steps,
                              fig,
                              axes
                              )
    

    

# event_type_node_IN = (
#     event_type_filtered
#     .where(mask_CLCselected_land_covers{lci+1}"])
#     .mean(['x', 'y'])
# )