# import libraries
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np

# Open the landsat and air temrperature datasets
landsat = xr.open_dataset("./data/small_landsat.nc")
air_temp = xr.open_dataset("./data/air.mon.mean.nc")

# Explore landsat dataset
landsat

# Explore air temperature dataset
air_temp

# Extract the air temprature variables
ta = air_temp["air"]
ta

ta = xr.open_dataarray("./data/air.mon.mean.nc")
ta

# ta.values
data = ta.data
print(data)
print(data.shape)

ta.dims

ta.coords

ta.coords["time"]

ta.attrs
# Similar to coordinates, you can access the attributes of a DataArray using the .attrs[""]:
# print(ta.attrs['units'])

# For example get the shape of the data
ta.shape

# For example index data at the first time step and all latitudes and longitudes.
ta[0, :, :]

# ta.to_netcdf("ta.nc")

# Your codes here

# first find what is the index for latitude from 0 to 90
# print(ta.coords["lat"][0:180])
upper_hemsphere = ta[100, 0:180, :]
plt.imshow(upper_hemsphere.values, cmap="coolwarm")

# you can do same thing along lat and lon dimensions
data_isel = ta.isel(time=range(0, 12))
data_isel

data_sel = ta.sel(time=["2023-08-01", "2023-09-01"])
data_sel

ta.sel(time=slice("1984-01-01", "1984-12-01"))

# fixing longitude
# We will soon see more on basic calculations like below:
ta.coords["lon"] = (ta.coords["lon"] + 180) % 360 - 180
ta = ta.sortby(ta.lon)

nearest_neighbor = ta.sel(lat=43, lon=89, method="nearest")
plt.plot(nearest_neighbor.values, "-")
nearest_neighbor

da = ta.sel(time="2023-05-01")
da_cold = da.where(da < 273.15)
plt.imshow(da_cold.values, cmap="coolwarm")

# Replace the below zero values with 100 (just for the sake of example)
da_hot = xr.where(da < 273.15, 500, da)
plt.imshow(da_hot.values, cmap="coolwarm")

da_tmp = xr.where(da < 273.15, ta.sel(time="1985-08-01"), da)

ta.sel(time=slice("2010-01-01", "2020-12-31")).isel(lat=100, lon=150).plot()

ta.sel(time=slice("2010-01-01", "2020-12-31")).isel(lat=100, lon=150).plot.line("-*")

ta.sel(time=slice("2010-01-01", "2020-12-31")).isel(lat=100, lon=150).plot(
    aspect=2, size=3
)
plt.title("Temperature at 100N, 150E")
plt.xlabel("Monthly Time")
plt.ylabel("Temperature (K)")
plt.tight_layout()

ta.isel(time=0).plot(robust=True, cbar_kwargs={"label": "Temperature (°C)"})
plt.title("Temperature in January 1948")
plt.xlabel("Longitude")
plt.ylabel("Latitude")

ta_sub = ta.sel(time=slice("2010-01-01", "2010-06-30"))
ta_sub.plot(col="time", col_wrap=3)

import cartopy.crs as ccrs
import cartopy.feature as cfeature

p = ta.isel(time=10).plot(
    subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"),
    transform=ccrs.PlateCarree(),
    robust=True,
)

p.axes.coastlines()
p.axes.add_feature(cfeature.BORDERS)

p = ta.isel(time=10).plot(
    subplot_kws=dict(projection=ccrs.Robinson(), facecolor="gray"),
    figsize=(10, 5),
    transform=ccrs.PlateCarree(),
    robust=True,
)

p.axes.coastlines()
p.axes.add_feature(cfeature.BORDERS)

# import hvplot.xarray

# ta.isel(time=10).hvplot(
#     width=800,
#     height=400,
#     cmap="fire",
#     projection=ccrs.Mollweide(),
# )

# # !pip install --upgrade ipywidgets
# ta.isel(time=range(12)).hvplot(
#     width=800,
#     height=400,
#     cmap="fire",
#     # projection=ccrs.Orthographic(-90, 30),
#     coastline=True,
#     groupby="time",
#     widget_type="scrubber",
#     widget_location="bottom",
# )

# For example let calculate log and sin of the air temprature, whatever they mean!
ta_sel = ta.sel(time="2023-08-01")

log_ta = np.log(ta_sel)
sin_ta = np.sin(ta_sel)

fig, axes = plt.subplots(2, 1, figsize=(8, 6))
log_ta.plot(ax=axes[0])
sin_ta.plot(ax=axes[1])

axes[0].set_title("Log of Air Temperature!")
axes[1].set_title("Sine of Air Temperature!")
plt.tight_layout()

### Let calcualte Normalized Difference Vegetation Index (NDVI) from landsat data
# NDVI = (NIR - RED) / (NIR + RED)
NIR = landsat["SR_B5"]
RED = landsat["SR_B4"]
NDVI = (NIR - RED) / (NIR + RED)
NDVI = NDVI.squeeze()

NIR.sel(time="2023-05-04").plot(
    cmap="viridis",
    cbar_kwargs={"label": "NIR"},
    figsize=(8, 4),
)

plt.subplot(1, 2, 1)
ta_sel.isnull().plot()
plt.subplot(1, 2, 2)
ta_sel.notnull().plot()

ta_sel.fillna(0).plot(robust=True)

ta_sel_mean = ta_sel.mean()
ta_sel_mean
# Try to sum, std, min, max, median, quantile, etc.

ta_mean = ta.mean(dim=["lat", "lon"])
ta_mean.sel(time=slice("2020-01-01", "2022-12-01")).plot()

# Your codes here

ta.sel(time=slice("1990-01-01", "2010-12-01")).mean(dim="time").plot()

ta_group = ta.groupby("time.year").mean()
ta_resample = ta.resample(time="Y").mean()
# Notice the dimension of the DataArray is now year instead of time

annual_ta = ta.groupby("time.year").mean()  # Group the data by year
window_size = 5  # Define the window size for the moving average

# # Calculate the moving average
moving_avg = annual_ta.rolling(year=window_size, center=True).mean()

# plot the results of moving average

# your codes here

# Solution
fig, ax = plt.subplots(figsize=(10, 5))
annual_ta.mean(dim=["lat", "lon"]).plot()
moving_avg.mean(dim=["lat", "lon"]).plot(label="5-Year Moving Average")

ta_resample = ta.resample(time="5Y").mean(dim="time")
ta_resample

# define a function to compute a linear trend of a time series (we use numpy.polyfit())
def linear_trend(y):
    if np.isnan(y).any():
        return np.nan
    x = np.arange(len(y))
    pf = np.polyfit(x, y, 1)
    # need to return an xr.DataArray for groupby
    return pf[0]

# usage of the function
x = np.arange(30)
y = 2 * x + 3 + np.random.randn(30) * 5
plt.plot(x, y, "o")
trend = linear_trend(y)
plt.plot(x, trend * x + 3, "-r")
plt.title("Linear Trend")

# Calculate the annual mean
ta_annual = ta.resample(time="Y").mean()
# Calculate the linear trend
trend_result = xr.apply_ufunc(
    linear_trend,  # The function to apply (linear_trend in this case)
    ta_annual,  # The input data (ta_annual in this case)
    input_core_dims=[
        ["time"]
    ],  # Specifies the core dimensions of the input data (in this case, "time" is the core dimension)
    vectorize=True,  # Vectorize the function (apply element-wise operations)
)

trend_result.plot(robust=True)

# Calculate the mask for summer months (June, July, August)
mask = (ta["time.month"] >= 6) & (ta["time.month"] <= 8)

# Apply the mask to select only summer months data
ta_summer = ta.where(mask, drop=True)

# Calculate the annual mean of summer temperatures
ta_summer_annual = ta_summer.groupby("time.year").mean()

# Calculate the climatological mean of summer temperatures
clima_mean = ta_summer_annual.mean(dim="year")

# Calculate the summer temperature anomalies by subtracting the climatological mean
summer_anomalies = ta_summer_annual - clima_mean

fig, axs = plt.subplots(ncols=2, figsize=(12, 4))

# Plot the summer anomalies for the year 2023
summer_anomalies.sel(year=2023).plot(robust=True, cmap="coolwarm", ax=axs[0])

# Plot the global mean of summer temperature anomalies
summer_anomalies.mean(["lat", "lon"]).plot(ax=axs[1])

# Add an arrow pointing to the anomaly values in the year 2021
axs[1].annotate(
    "Approaching values projected for 2100 :(",
    xy=(2023, summer_anomalies.sel(year=2023).mean(["lat", "lon"])),
    xytext=(1960, summer_anomalies.sel(year=2023).mean(["lat", "lon"]) - 0.5),
    arrowprops=dict(facecolor="black", arrowstyle="->"),
)

plt.tight_layout()

# Your codes here

RCC - UChicago, 2025¶

Geospatial Python Part 1: Satellite & Climate Raster Analysis¶

Instructors:¶

Commands¶

Topics¶

Geospatial Data Formats for Climate and Satellite Data¶

Key Formats¶

NetCDF (Network Common Data Form; Our focus today)¶

HDF5 (Hierarchical Data Format version 5)¶

Zarr¶

STAC (SpatioTemporal Asset Catalog)¶

Basics of Xarray¶

Xarray: Handling NetCDF¶

1. Get Data¶

2. Open the data and explore¶

Indexing and selecting data and simple plotting¶

Create Mask with `where()`¶

3. Plotting¶

One dimensional plot¶

Two dimensional plots¶

Faceting¶

More advanced plots¶

Interactive plotting¶

4. Computations¶

Practice: plot the map of global temp mean from the year 1990 to 2010 and plot the results¶

Apply a costume function to the data along dimension(s)¶

Problem 2: Calculating Annual Summer Temperature Anomaly¶

Objective:¶

Hint:¶

Procedure:¶

Note:¶

Calculate Landsat Enhanced Vegetation Index (EVI)¶

Large Data and Speed: The Next Challenge¶

RCC - UChicago, 2025¶

Geospatial Python Part 1: Satellite & Climate Raster Analysis¶

Instructors:¶

Commands¶

Topics¶

Geospatial Data Formats for Climate and Satellite Data¶

Key Formats¶

NetCDF (Network Common Data Form; Our focus today)¶

HDF5 (Hierarchical Data Format version 5)¶

Zarr¶

STAC (SpatioTemporal Asset Catalog)¶

Basics of Xarray¶

Xarray: Handling NetCDF¶

1. Get Data¶

2. Open the data and explore¶

Indexing and selecting data and simple plotting¶

Create Mask with where()¶

3. Plotting¶

One dimensional plot¶

Two dimensional plots¶

Faceting¶

More advanced plots¶

Interactive plotting¶

4. Computations¶

Practice: plot the map of global temp mean from the year 1990 to 2010 and plot the results¶

Apply a costume function to the data along dimension(s)¶

Problem 2: Calculating Annual Summer Temperature Anomaly¶

Objective:¶

Hint:¶

Procedure:¶

Note:¶

Calculate Landsat Enhanced Vegetation Index (EVI)¶

Large Data and Speed: The Next Challenge¶

Create Mask with `where()`¶