ghg_forcing_for_cmip.plotting#

Plotting functions for tutorials in documentation

Functions:

Name	Description
`plot_average_hemisphere`	Plot global, monthly average separate for hemispheres
`plot_collocated_rmse`	Plot rmse for collocated data for GB vs. EO data
`plot_coverage`	Plot GHG observations on world map
`plot_future_predictions`	Plot observed, fitted, and predicted data
`plot_global_hemisphere`	Plot global average of GHG
`plot_global_hemisphere_coverage`	Plot global, monthly-avg. ghg concentration
`plot_gridsizes`	Plot earth gridding
`plot_hemisphere`	Plot observed and predicted data aggregated over different hemispheres
`plot_locations`	Plot future years
`plot_map`	Plot world map with observation stations
`plot_monthly_average`	Plot global, monthly average of GHG concentration
`plot_prophet_components`	Plot trend and seasonality components from Prophet models.

plot_average_hemisphere #

plot_average_hemisphere(
    d_colloc: DataFrame, gas: str
) -> None

Plot global, monthly average separate for hemispheres

Parameters:

Name	Type	Description	Default
`d_colloc`	`DataFrame`	dataframe including grouping variable with hemisphere levels (Northern, Southern, Tropics) whereby Northern: >30°, Southern: < -30°, and Tropics: <30°, >-30°	required
`gas`	`str`	either "co2" or "ch4"	required

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_average_hemisphere(d_colloc: pd.DataFrame, gas: str) -> None:
    """
    Plot global, monthly average separate for hemispheres

    Parameters
    ----------
    d_colloc :
        dataframe including grouping variable with
        hemisphere levels (Northern, Southern, Tropics)
        whereby Northern: >30°, Southern: < -30°, and
        Tropics: <30°, >-30°

    gas :
        either "co2" or "ch4"
    """
    try:
        import matplotlib.pyplot as plt
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="matplotlib"
        ) from exc

    try:
        import seaborn as sns
    except ImportError as exc:
        raise MissingOptionalDependencyError("plotting", requirement="seaborn") from exc

    conditions = [d_colloc["lat"] > 30, d_colloc["lat"] < -30]  # noqa: PLR2004
    d_colloc["hemisphere"] = np.select(
        conditions, ["Northern", "Southern"], default="Tropics"
    )

    _, axs = plt.subplots(1, 3, constrained_layout=True, figsize=(11, 3), sharey=True)
    d_avg = (
        d_colloc.groupby(["year", "month", "hemisphere"])
        .agg({"value_gb": "mean", "value_eo": "mean", "time_fractional": "mean"})
        .reset_index()
    )
    for i, hemi in enumerate(["Northern", "Southern", "Tropics"]):
        d = d_avg[d_avg.hemisphere == hemi]
        sns.lineplot(
            d,
            x="time_fractional",
            y="value_gb",
            linewidth=2.0,
            ax=axs[i],
            label="ground-based",
        )
        sns.lineplot(
            d,
            x="time_fractional",
            y="value_eo",
            linewidth=2.0,
            ax=axs[i],
            label="satellite",
        )
        axs[i].set_xlabel("time")
        axs[i].set_title(np.where(hemi == "Tropics", hemi, f"{hemi} hemisphere"))
        axs[0].set_ylabel(gas)
        axs[i].legend(frameon=False)

plot_collocated_rmse #

plot_collocated_rmse(
    d_colloc_co2: DataFrame,
    d_colloc_ch4: DataFrame,
    measure: str,
) -> Any

Plot rmse for collocated data for GB vs. EO data

Parameters:

Name	Type	Description	Default
`d_colloc_co2`	`DataFrame`	collocated data for co2	required
`d_colloc_ch4`	`DataFrame`	collocated data for ch4	required
`measure`	`str`	either "rmse" or "dcor"	required

Returns:

Type	Description
`Any`	axs of matplotlib.pyplot

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_collocated_rmse(
    d_colloc_co2: pd.DataFrame, d_colloc_ch4: pd.DataFrame, measure: str
) -> Any:
    """
    Plot rmse for collocated data for GB vs. EO data

    Parameters
    ----------
    d_colloc_co2 :
        collocated data for co2

    d_colloc_ch4 :
        collocated data for ch4

    measure :
        either "rmse" or "dcor"

    Returns
    -------
    :
        axs of matplotlib.pyplot
    """
    try:
        import matplotlib.pyplot as plt
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="matplotlib"
        ) from exc

    try:
        import seaborn as sns
    except ImportError as exc:
        raise MissingOptionalDependencyError("plotting", requirement="seaborn") from exc

    df_measure = pd.DataFrame(
        compute_discrepancy_collocated(d_colloc_co2, "co2", measure)
    ).merge(
        compute_discrepancy_collocated(d_colloc_ch4, "ch4", measure), on="site_code"
    )
    df_measure["sd_co2"] = np.sqrt(df_measure["var_co2"])
    df_measure["sd_ch4"] = np.sqrt(df_measure["var_ch4"])

    _, axs = plt.subplots(2, 1, constrained_layout=True, figsize=(8, 5))
    for meas in ["rmse", "bias", "sd"]:
        sns.lineplot(
            data=df_measure.sort_values(by=f"{meas}_co2"),
            y=f"{meas}_co2",
            x="site_code",
            ax=axs[0],
            label=meas.upper(),
        )
        sns.lineplot(
            data=df_measure.sort_values(by=f"{meas}_ch4"),
            y=f"{meas}_ch4",
            x="site_code",
            ax=axs[1],
        )
    for i in range(2):
        axs[i].tick_params(axis="both", labelsize=8)
        axs[i].set_xlabel(measure.upper())
        axs[i].tick_params(axis="x", rotation=90)
        axs[i].axhline(0, linestyle="dashed", color="darkgrey", lw=1)
    axs[1].set_ylabel("")
    axs[1].set_title(
        ("Methane (avg. RMSE:" + f"{df_measure[f'{measure}_ch4'].mean():.2f})"),
        fontsize="medium",
    )
    axs[0].set_title(
        ("Carbon Dioxide (avg. RMSE:" + f"{df_measure[f'{measure}_co2'].mean():.2f})"),
        fontsize="medium",
    )
    axs[0].set_ylabel("CO2 in ppm")
    axs[1].set_ylabel("CH4 in ppb")
    return axs

plot_coverage #

plot_coverage(
    df: DataFrame,
    year: int,
    grid_size: int,
    gas: str,
    unit: str,
    ms: int = 10,
    lw: float = 0.5,
    vmin: Optional[float] = None,
    vmax: Optional[float] = None,
    xlim: Optional[list[float]] = None,
    ylim: Optional[list[float]] = None,
) -> Any

Plot GHG observations on world map

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataset including ghg data	required
`year`	`int`	year for which data should be visualized	required
`grid_size`	`int`	size of single grid cells	required
`gas`	`str`	name of greenhouse gas	required
`unit`	`str`	unit of greenhouse gas	required
`ms`	`int`	marker size, by default 10	`10`
`lw`	`float`	red border used to visualize observed data, by default 0.5	`0.5`
`vmin`	`Optional[float]`	minimum scale value for legend, if `None` will be determined by data	`None`
`vmax`	`Optional[float]`	maximum scale value for legend, if `None` will be determined by data	`None`
`xlim`	`Optional[list[float]]`	range of x-axis that should be visualized, can be used to "zoom" into the map	`None`
`ylim`	`Optional[list[float]]`	range of y-axis that should be visualized, can be used to "zoom" into the map	`None`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_coverage(  # noqa: PLR0913
    df: pd.DataFrame,
    year: int,
    grid_size: int,
    gas: str,
    unit: str,
    ms: int = 10,
    lw: float = 0.5,
    vmin: Optional[float] = None,
    vmax: Optional[float] = None,
    xlim: Optional[list[float]] = None,
    ylim: Optional[list[float]] = None,
) -> Any:
    """
    Plot GHG observations on world map

    Parameters
    ----------
    df :
        dataset including ghg data

    year :
        year for which data should be visualized

    grid_size :
        size of single grid cells

    gas :
        name of greenhouse gas

    unit :
        unit of greenhouse gas

    ms :
        marker size, by default 10

    lw :
        red border used to visualize observed data,
        by default 0.5

    vmin :
        minimum scale value for legend,
        if `None` will be determined by data

    vmax :
        maximum scale value for legend,
        if `None` will be determined by data

    xlim :
        range of x-axis that should be visualized,
        can be used to "zoom" into the map

    ylim :
        range of y-axis that should be visualized,
        can be used to "zoom" into the map

    Returns
    -------
    :
        fig, axs
    """
    # compute grid-cell average
    df_gb_binned = (
        df[df.year == year]
        .groupby(["lat", "lon"])
        .agg({"value_gb": "mean", "obs_gb": "max"})
        .reset_index()
    )

    gdf_gb = geopandas.GeoDataFrame(
        df_gb_binned,
        geometry=geopandas.points_from_xy(df_gb_binned.lon, df_gb_binned.lat),
        crs="EPSG:4326",
    )

    world = geopandas.read_file(geodatasets.get_path("naturalearth.land"))

    legend_kwds = {
        "shrink": 0.8,
        "label": f"{gas.upper()} [{unit}]",
        "orientation": "vertical",
    }

    fig, axs = plt.subplots(1, 1, figsize=(12, 4))
    world.plot(ax=axs, facecolor="none", edgecolor="black")

    gdf_gb.plot(
        ax=axs,
        column=df_gb_binned.value_gb,
        marker="s",
        markersize=ms,
        vmin=vmin,
        vmax=vmax,
        zorder=0,
        legend=True,
        legend_kwds=legend_kwds,
    )
    gdf_highlight = gdf_gb[gdf_gb["obs_gb"] == True]  # noqa

    if not gdf_highlight.empty:
        gdf_highlight.plot(
            ax=axs,
            facecolor="none",
            edgecolor="red",
            linewidth=lw,
            marker="s",
            markersize=ms,
            zorder=0,
        )

    for hl in np.arange(0, 90 + grid_size, grid_size):
        axs.axhline(float(hl), color="lightgrey", lw=0.5)
        axs.axhline(-float(hl), color="lightgrey", lw=0.5)
    for vl in np.arange(0, 180 + grid_size, grid_size):
        axs.axvline(float(vl), color="lightgrey", lw=0.5)
        axs.axvline(-float(vl), color="lightgrey", lw=0.5)

    if xlim is not None:
        axs.set_xlim(xlim[0], xlim[1])

    if ylim is not None:
        axs.set_ylim(ylim[0], ylim[1])

    axs.set_title(f"{grid_size} x {grid_size} grid size (red squares: observed GB)")
    axs.set_xlabel("longitude")
    axs.set_ylabel("latitude")

    return fig, axs

plot_future_predictions #

plot_future_predictions(
    df_observed: DataFrame,
    df_coverage: DataFrame,
    df_future_years: DataFrame,
    min_year: int,
    split_value: int,
    figsize: tuple[int],
    gas: str,
    unit: str,
    day: int = 15,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any

Plot observed, fitted, and predicted data

Monthly aggregated across hemispheres and for the global mean.

Parameters:

Name	Type	Description	Default
`df_observed`	`DataFrame`	observed GHG data	required
`df_coverage`	`DataFrame`	predicted data within observed year range showing full coverage	required
`df_future_years`	`DataFrame`	predicted data for future years	required
`min_year`	`int`	minimum year for truncating the x-axis	required
`split_value`	`int`	value used to differentiate between southern, northern hemisphere, and tropics	required
`figsize`	`tuple[int]`	size of plot	required
`gas`	`str`	name of GHG	required
`unit`	`str`	unit of GHG	required
`day`	`int`	day used for creating a date variable	`15`
`quantiles`	`tuple[float, float]`	quantiles used for uncertainty shading	`(0.025, 0.975)`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_future_predictions(  # noqa: PLR0913
    df_observed: pd.DataFrame,
    df_coverage: pd.DataFrame,
    df_future_years: pd.DataFrame,
    min_year: int,
    split_value: int,
    figsize: tuple[int],
    gas: str,
    unit: str,
    day: int = 15,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any:
    """
    Plot observed, fitted, and predicted data

    Monthly aggregated across hemispheres and for the global mean.

    Parameters
    ----------
    df_observed :
        observed GHG data

    df_coverage :
        predicted data within observed year range showing full coverage

    df_future_years :
        predicted data for future years

    min_year :
        minimum year for truncating the x-axis

    split_value :
        value used to differentiate between southern,
        northern hemisphere, and tropics

    figsize :
        size of plot

    gas :
        name of GHG

    unit :
        unit of GHG

    day :
        day used for creating a date variable

    quantiles :
        quantiles used for uncertainty shading

    Returns
    -------
    :
        fig, axs
    """

    def add_date(df: pd.DataFrame) -> pd.DataFrame:
        """Assign date variable"""
        return df.assign(date=pd.to_datetime(df[["year", "month"]].assign(day=day)))

    df_full = df_coverage[df_coverage.year > min_year].copy()
    df_full = preprocessing.add_hemisphere(df_full, split_value)
    df_full = add_date(df_full)

    df_future_years = df_future_years[df_future_years.year > min_year].copy()
    df_future_years = preprocessing.add_hemisphere(df_future_years, split_value)
    df_future_years = add_date(df_future_years)

    df_observed = df_observed[df_observed.year > min_year].copy()
    df_observed = preprocessing.add_hemisphere(df_observed, split_value)
    df_observed = add_date(df_observed)

    dfs_hemi = []
    dv_names = ["value_gb", "value_gb_pred", "value"]

    input_dfs = [df_full, df_future_years, df_observed]

    for df, dv_name in zip(input_dfs, dv_names):
        dfs_hemi.append(
            df.groupby(["date", "hemisphere"])
            .agg(
                mean=(dv_name, "mean"),
                lower=(dv_name, lambda x: x.quantile(quantiles[0])),
                upper=(dv_name, lambda x: x.quantile(quantiles[1])),
            )
            .reset_index()
        )

    fig, axs = plt.subplots(3, 1, figsize=figsize, constrained_layout=True, sharex=True)

    plot_configs = [
        (0, "northern", "tab:blue"),
        (1, "southern", "tab:orange"),
        (2, "tropics", "tab:green"),
    ]

    fit_hemi, fut_hemi, obs_hemi = dfs_hemi

    for c, region, color in plot_configs:
        ax = axs[c]

        df_fit = fit_hemi[fit_hemi.hemisphere == region]
        df_fut = fut_hemi[fut_hemi.hemisphere == region]
        df_obs = obs_hemi[obs_hemi.hemisphere == region]
        title = region.capitalize()

        sns.scatterplot(
            data=df_obs,
            x="date",
            y="mean",
            color="black",
            s=10,
            alpha=0.6,
            ax=ax,
            label="Observed" if c == 0 else None,
        )

        ax.plot(
            df_fit.date,
            df_fit["mean"],
            color=color,
            lw=1,
            label="Fitted" if c == 0 else None,
        )

        ax.plot(
            df_fut.date,
            df_fut["mean"],
            color=color,
            lw=2,
            label="Prediction" if c == 0 else None,
        )
        ax.fill_between(
            df_fut.date,
            df_fut.lower,
            df_fut.upper,
            color=color,
            alpha=0.1,
            edgecolor="none",
        )

        ax.set_title(title)
        ax.set_ylabel(f"{gas.upper()} [{unit}]")
        ax.set_xlabel("")
        ax.spines[["right", "top"]].set_visible(False)

    axs[0].legend(frameon=False, loc="upper left", fontsize="small")
    for i in range(3):
        axs[i].set_xlabel("date")
    return fig, axs

plot_global_hemisphere #

plot_global_hemisphere(
    df: DataFrame,
    gas: str,
    unit: str,
    figsize: Optional[tuple[int, int]] = None,
) -> Any

Plot global average of GHG

grouped by northern, southern hemispheres and tropics

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	dataframe	required
`gas`	`str`	name of greenhouse gas	required
`unit`	`str`	unit of greenhouse gas	required
`figsize`	`Optional[tuple[int, int]]`	size of figure	`None`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_global_hemisphere(
    df: pd.DataFrame, gas: str, unit: str, figsize: Optional[tuple[int, int]] = None
) -> Any:
    """
    Plot global average of GHG

    grouped by northern, southern hemispheres and tropics

    Parameters
    ----------
    df :
        dataframe

    gas :
        name of greenhouse gas

    unit :
        unit of greenhouse gas

    figsize :
        size of figure

    Returns
    -------
    :
        fig, axs
    """
    fig, axs = plt.subplots(1, 1, figsize=figsize)
    sns.lineplot(
        data=df, x="date", y="value_gb_pred", hue="hemisphere", lw=2, alpha=0.5, ax=axs
    )
    for hemi, label in zip(df.hemisphere.unique(), ["observed", None, None]):
        sns.scatterplot(
            data=df[df.hemisphere == hemi],
            x="date",
            y="value_gb",
            color="black",
            s=5,
            lw=0,
            ax=axs,
            label=label,
        )
    axs.set_ylabel(f"{gas.upper()} in {unit}")
    axs.spines[["right", "top"]].set_visible(False)
    axs.legend(frameon=False, handlelength=0.5, ncol=3)

    return fig, axs

plot_global_hemisphere_coverage #

plot_global_hemisphere_coverage(
    df_collocated: DataFrame,
    df_coverage: DataFrame,
    gas: str,
    unit: str,
    split_value: int,
    figsize: Optional[tuple[int, int]] = None,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any

Plot global, monthly-avg. ghg concentration

Plotting for different hemispheres and compared to observed data.

Parameters:

Name	Type	Description	Default
`df_collocated`	`DataFrame`	collocated dataframe including observed data (i.e., gb_value)	required
`df_coverage`	`DataFrame`	gap filled dataframe including predicted data that covers full spatio-temporal range	required
`gas`	`str`	name of greenhouse gas	required
`unit`	`str`	unit of greenhouse gas	required
`split_value`	`int`	splitting value used to differentiate between northern, southern hemisphere and tropics	required
`figsize`	`Optional[tuple[int, int]]`	size of figure	`None`
`quantiles`	`tuple[float, float]`	quantiles used for uncertainty shading	`(0.025, 0.975)`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_global_hemisphere_coverage(  # noqa: PLR0913
    df_collocated: pd.DataFrame,
    df_coverage: pd.DataFrame,
    gas: str,
    unit: str,
    split_value: int,
    figsize: Optional[tuple[int, int]] = None,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any:
    """
    Plot global, monthly-avg. ghg concentration

    Plotting for different hemispheres and compared to
    observed data.

    Parameters
    ----------
    df_collocated :
        collocated dataframe including observed data
        (i.e., gb_value)

    df_coverage :
        gap filled dataframe including predicted data
        that covers full spatio-temporal range

    gas :
        name of greenhouse gas

    unit :
        unit of greenhouse gas

    split_value :
        splitting value used to differentiate between
        northern, southern hemisphere and tropics

    figsize :
        size of figure

    quantiles :
        quantiles used for uncertainty shading

    Returns
    -------
    :
        fig, axs
    """
    df_pred = preprocessing.add_hemisphere(df_coverage, split_value=split_value)
    df_pred = (
        df_pred.groupby(["date", "hemisphere"])
        .agg(
            mean=("value_gb", "mean"),
            lower=("value_gb", lambda x: x.quantile(quantiles[0])),
            upper=("value_gb", lambda x: x.quantile(quantiles[1])),
        )
        .reset_index()
    )

    df_observed = preprocessing.add_hemisphere(df_collocated, split_value=split_value)
    df_observed = (
        df_observed.groupby(["date", "hemisphere"])
        .agg({"value_gb": "mean"})
        .reset_index()
    )

    unique_hemis = df_pred["hemisphere"].unique()
    palette = dict(
        zip(unique_hemis, sns.color_palette("tab10", n_colors=len(unique_hemis)))
    )

    fig, axs = plt.subplots(3, 1, figsize=figsize, constrained_layout=True, sharex=True)

    for i, hemi in enumerate(unique_hemis):
        subset = df_pred[df_pred["hemisphere"] == hemi]
        color = palette[hemi]

        axs[i].plot(subset["date"], subset["mean"], color=color, lw=2)
        axs[i].set_title(hemi)

        axs[i].fill_between(
            subset["date"],
            subset["lower"],
            subset["upper"],
            color=color,
            alpha=0.3,
            edgecolor="none",
        )

    for i, (hemi, label) in enumerate(
        zip(df_pred.hemisphere.unique(), ["observed", None, None])
    ):
        sns.scatterplot(
            data=df_observed[df_observed.hemisphere == hemi],
            x="date",
            y="value_gb",
            color="black",
            s=5,
            lw=0,
            ax=axs[i],
            label=label,
            zorder=3,
        )

        axs[i].set_ylabel(f"{gas.upper()} in {unit}")
        axs[i].spines[["right", "top"]].set_visible(False)

    axs[0].legend(frameon=False, handlelength=0.5, ncol=3)

    return fig, axs

plot_gridsizes #

plot_gridsizes(
    df: DataFrame, grid_size: int, subset: bool
) -> Any

Plot earth gridding

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	raw ground-based data frame	required
`grid_size`	`int`	size of grid cell; should be CONFIG.GRID_CELL_SIZE	required
`subset`	`bool`	if true shows only Europe if false shows Earth	required

Returns:

Type	Description
`Any`	fig, axs from plot

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_gridsizes(df: pd.DataFrame, grid_size: int, subset: bool) -> Any:
    """
    Plot earth gridding

    Parameters
    ----------
    df :
        raw ground-based data frame

    grid_size :
        size of grid cell;
        should be CONFIG.GRID_CELL_SIZE

    subset :
        if true shows only Europe
        if false shows Earth

    Returns
    -------
    :
        fig, axs from plot

    """
    try:
        import matplotlib.pyplot as plt
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="matplotlib"
        ) from exc

    try:
        import geopandas
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="geopandas"
        ) from exc

    try:
        from geodatasets import get_path
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="geodatasets"
        ) from exc

    # compute grid-cell average
    df_binned = utils.weighted_average(df, ["lat", "lon"])

    # min,max to standardize colorbar
    vmin, vmax = np.min(df_binned.value), np.max(df_binned.value)

    # raw data points
    gdf = geopandas.GeoDataFrame(
        df,
        geometry=geopandas.points_from_xy(df.longitude, df.latitude),
        crs="EPSG:4326",
    )

    # binned data points
    gdf2 = geopandas.GeoDataFrame(
        df_binned,
        geometry=geopandas.points_from_xy(df_binned.lon, df_binned.lat),
        crs="EPSG:4326",
    )

    world = geopandas.read_file(get_path("naturalearth.land"))

    fig, axs = plt.subplots(1, 1)
    world.plot(ax=axs, color="lightgrey", edgecolor="grey")
    if subset:
        gdf.plot(
            ax=axs,
            column=df.value,
            marker="s",
            markersize=5,
            zorder=4,
            vmin=vmin,
            vmax=vmax,
            legend=True,
            legend_kwds={
                "shrink": 0.8,
                "label": f"{df.gas.iloc[0].upper()} [{df.unit.iloc[0]}]",
                "orientation": "vertical",
            },
        )
        gdf2.plot(
            ax=axs,
            column=df_binned.value,
            marker="s",
            markersize=50,
            zorder=2,
            vmin=vmin,
            vmax=vmax,
            edgecolor="black",
        )
    else:
        gdf2.plot(
            ax=axs,
            column=df_binned.value,
            marker="s",
            markersize=int((5 * grid_size) / 2),
            zorder=4,
            vmin=vmin,
            vmax=vmax,
            legend=True,
            legend_kwds={
                "shrink": 0.5,
                "label": f"{df.gas.iloc[0].upper()} [{df.unit.iloc[0]}]",
                "orientation": "vertical",
            },
        )

    for hl in np.arange(0, 90 + grid_size, grid_size):
        axs.axhline(float(hl), color="black", lw=0.5)
        axs.axhline(-float(hl), color="black", lw=0.5)
    for vl in np.arange(0, 180 + grid_size, grid_size):
        axs.axvline(float(vl), color="black", lw=0.5)
        axs.axvline(-float(vl), color="black", lw=0.5)

    if subset:
        axs.set_xlim(-10, 40)
        axs.set_ylim(30, 70)

    axs.set_title(f"{grid_size} x {grid_size} grid size")
    axs.set_xlabel("longitude")
    axs.set_ylabel("latitude")

    return fig, axs

plot_hemisphere #

plot_hemisphere(
    df_pred: DataFrame,
    df_obs: DataFrame,
    gas: str,
    unit: str,
    year_min: int,
    split_value: int,
    figsize: tuple[int],
    day: int = 15,
) -> Any

Plot observed and predicted data aggregated over different hemispheres

Parameters:

Name	Type	Description	Default
`df_pred`	`DataFrame`	predicted data, spanning full coverage	required
`df_obs`	`DataFrame`	observed ground-based data	required
`gas`	`str`	name of greenhouse gas	required
`unit`	`str`	unit of greenhouse gas measurement	required
`year_min`	`int`	minimum year used for showing x-range	required
`split_value`	`int`	value used to differentiate between northern, southern hemisphere and tropics	required
`figsize`	`tuple[int]`	plot size	required
`day`	`int`	day used for constructing date variable	`15`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_hemisphere(  # noqa: PLR0913
    df_pred: pd.DataFrame,
    df_obs: pd.DataFrame,
    gas: str,
    unit: str,
    year_min: int,
    split_value: int,
    figsize: tuple[int],
    day: int = 15,
) -> Any:
    """
    Plot observed and predicted data aggregated over different hemispheres

    Parameters
    ----------
    df_pred :
        predicted data, spanning full coverage

    df_obs :
        observed ground-based data

    gas :
        name of greenhouse gas

    unit :
        unit of greenhouse gas measurement

    year_min :
        minimum year used for showing x-range

    split_value :
        value used to differentiate between
        northern, southern hemisphere and tropics

    figsize :
        plot size

    day :
        day used for constructing date variable

    Returns
    -------
    Any
        fig, axs
    """
    df_coverage = preprocessing.add_hemisphere(df_pred, split_value=split_value)
    df_observed = preprocessing.add_hemisphere(df_obs, split_value=split_value)
    df_observed["date"] = pd.to_datetime(df_observed[["year", "month"]].assign(day=day))
    df_observed = (
        df_observed[df_observed.year > year_min]
        .groupby(["date", "hemisphere"])
        .agg({"value": "mean"})
        .reset_index()
    )

    fig, axs = plt.subplots(1, 1, figsize=figsize)
    sns.lineplot(
        data=df_coverage.groupby(["date", "hemisphere"])
        .agg({"value_gb": "mean"})
        .reset_index(),
        x="date",
        y="value_gb",
        hue="hemisphere",
        ax=axs,
        lw=3,
        alpha=0.4,
    )
    for hemi in df_observed.hemisphere.unique():
        sns.scatterplot(
            data=df_observed[df_observed.hemisphere == hemi],
            x="date",
            y="value",
            color="black",
            ax=axs,
            lw=0,
            s=5,
        )
    axs.legend(frameon=False, handlelength=0.5, ncol=3)
    axs.set_ylabel(f"{gas.upper()} in {unit}")
    axs.spines[["right", "top"]].set_visible(False)

    return fig, axs

plot_locations #

plot_locations(
    df_observed: DataFrame,
    df_fitted: DataFrame,
    year_min: int,
    figsize: tuple[int],
    gas: str,
    unit: str,
    locations: DataFrame,
    df_predicted: Optional[DataFrame] = None,
    day: int = 15,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any

Plot future years

Parameters:

Name	Type	Description	Default
`df_observed`	`DataFrame`	observed data set	required
`df_fitted`	`DataFrame`	fitted data to observed data	required
`year_min`	`int`	minimum year used for truncating the x-axis	required
`figsize`	`tuple[int]`	size of figure	required
`gas`	`str`	name of greenhouse gas	required
`unit`	`str`	unit of greenhouse gas	required
`locations`	`DataFrame`	selected locations to plot	required
`df_predicted`	`Optional[DataFrame]`	predict future data	`None`
`day`	`int`	day used for creating a date variable, default is 15	`15`
`quantiles`	`tuple[float, float]`	quantiles used for uncertainty shading	`(0.025, 0.975)`

Returns:

Type	Description
`Any`	fig, axs

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_locations(  # noqa: PLR0913
    df_observed: pd.DataFrame,
    df_fitted: pd.DataFrame,
    year_min: int,
    figsize: tuple[int],
    gas: str,
    unit: str,
    locations: pd.DataFrame,
    df_predicted: Optional[pd.DataFrame] = None,
    day: int = 15,
    quantiles: tuple[float, float] = (0.025, 0.975),
) -> Any:
    """
    Plot future years

    Parameters
    ----------
    df_observed :
        observed data set

    df_fitted :
        fitted data to observed data

    year_min :
        minimum year used for truncating the x-axis

    figsize :
        size of figure

    gas :
        name of greenhouse gas

    unit :
        unit of greenhouse gas

    locations :
        selected locations to plot

    df_predicted :
        predict future data

    day :
        day used for creating a date variable, default is 15

    quantiles :
        quantiles used for uncertainty shading

    Returns
    -------
    :
        fig, axs
    """
    df_observed["date"] = pd.to_datetime(df_observed[["year", "month"]].assign(day=day))
    df_fitted["date"] = pd.to_datetime(df_fitted[["year", "month"]].assign(day=day))

    fig, axs = plt.subplots(
        4, 3, constrained_layout=True, sharey=True, sharex=True, figsize=figsize
    )
    k = 0
    for j in range(4):
        for i in range(3):
            observed_loc = df_observed[
                (df_observed.lat == locations.lat[k])
                & (df_observed.lon == locations.lon[k])
                & (df_observed.year > year_min)
            ]
            fitted_loc = df_fitted[
                (df_fitted.lat == locations.lat[k])
                & (df_fitted.lon == locations.lon[k])
                & (df_fitted.year > year_min)
            ]
            fitted_loc = (
                fitted_loc.groupby(["date"])
                .agg(
                    mean=("value_gb", "mean"),
                    lower=("value_gb", lambda x: x.quantile(quantiles[0])),
                    upper=("value_gb", lambda x: x.quantile(quantiles[1])),
                )
                .reset_index()
            )

            if df_predicted is not None:
                df_predicted["date"] = pd.to_datetime(
                    df_predicted[["year", "month"]].assign(day=day)
                )

                predicted_loc = df_predicted[
                    (df_predicted.lat == locations.lat[k])
                    & (df_predicted.lon == locations.lon[k])
                    & (df_predicted.year > year_min)
                ]
                predicted_loc = (
                    predicted_loc.groupby(["date"])
                    .agg(
                        mean=("value_gb_pred", "mean"),
                        lower=("value_gb_pred", lambda x: x.quantile(quantiles[0])),
                        upper=("value_gb_pred", lambda x: x.quantile(quantiles[1])),
                    )
                    .reset_index()
                )

            if (i == 0) & (j == 0):
                label_obs, label_fit, label_pred = "observed", "fitted", "predicted"
            else:
                label_obs, label_fit, label_pred = None, None, None

            sns.scatterplot(
                data=observed_loc,
                x="date",
                y="value",
                label=label_obs,
                s=5,
                lw=0,
                color="black",
                zorder=2,
                ax=axs[j, i],
            )
            sns.lineplot(
                data=fitted_loc,
                x="date",
                y="mean",
                label=label_fit,
                zorder=0,
                lw=2,
                alpha=0.5,
                ax=axs[j, i],
            )
            axs[j, i].fill_between(
                fitted_loc["date"],
                fitted_loc["lower"],
                fitted_loc["upper"],
                alpha=0.3,
                edgecolor="none",
            )
            if df_predicted is not None:
                sns.lineplot(
                    data=predicted_loc,
                    x="date",
                    y="mean",
                    label=label_pred,
                    zorder=3,
                    ax=axs[j, i],
                )
                axs[j, i].fill_between(
                    predicted_loc["date"],
                    predicted_loc["lower"],
                    predicted_loc["upper"],
                    alpha=0.3,
                    edgecolor="none",
                )
            axs[j, i].set_title(
                f"site: {locations.site_code[k]} , lat: {locations.lat[k]} , lon: {locations.lon[k]}"  # noqa: E501
            )
            axs[j, i].spines[["right", "top"]].set_visible(False)
            k += 1
        axs[j, 0].set_ylabel(f"{gas.upper()} in {unit}")
    axs[0, 0].legend(frameon=False, handlelength=0.5)

    return fig, axs

plot_map #

plot_map(
    d: DataFrame,
    title: str,
    axs: Any,
    lon_value: str = "longitude",
    lat_value: str = "latitude",
    marker: str = "x",
    markersize: int = 20,
    figsize: tuple[int, int] = (8, 3),
) -> Any

Plot world map with observation stations

Parameters:

Name	Type	Description	Default
`d`	`DataFrame`	dataframe with ghg concentrations and lat, lon information	required
`title`	`str`	title of the plot	required
`axs`	`Any`	axes of matplotlib	required
`lon_value`	`str`	name of longitudinal variable (column)	`'longitude'`
`lat_value`	`str`	name of latitudinal variable (column)	`'latitude'`
`marker`	`str`	shape of plot markers	`'x'`
`markersize`	`int`	size of plot markers	`20`
`figsize`	`tuple[int, int]`	size of world map	`(8, 3)`

Returns:

Type	Description
`Any`	axes from matplotlib pyplot

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_map(  # noqa: PLR0913
    d: pd.DataFrame,
    title: str,
    axs: Any,
    lon_value: str = "longitude",
    lat_value: str = "latitude",
    marker: str = "x",
    markersize: int = 20,
    figsize: tuple[int, int] = (8, 3),
) -> Any:
    """
    Plot world map with observation stations

    Parameters
    ----------
    d :
        dataframe with ghg concentrations and lat, lon information

    title :
        title of the plot

    axs :
        axes of matplotlib

    lon_value :
        name of longitudinal variable (column)

    lat_value :
        name of latitudinal variable (column)

    marker :
        shape of plot markers

    markersize :
        size of plot markers

    figsize :
        size of world map

    Returns
    -------
    :
        axes from matplotlib pyplot
    """
    try:
        import geopandas
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="geopandas"
        ) from exc

    try:
        from geodatasets import get_path
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "plotting", requirement="geodatasets"
        ) from exc

    gdf = geopandas.GeoDataFrame(
        d,
        geometry=geopandas.points_from_xy(d[lon_value], d[lat_value]),
        crs="EPSG:4326",
    )

    world = geopandas.read_file(get_path("naturalearth.land"), engine="fiona")
    world.plot(ax=axs, color="white", edgecolor="grey")
    gdf.plot(figsize=figsize, ax=axs, color="red", marker=marker, markersize=markersize)
    axs.set_title(title)
    return axs

plot_monthly_average #

plot_monthly_average(
    d: DataFrame,
    gas: str,
    axs: Any,
    label_eo: str = "satellite",
    label_gb: str = "ground_based",
) -> Any

Plot global, monthly average of GHG concentration

Parameters:

Name	Type	Description	Default
`d`	`DataFrame`	dataframe with raw data	required
`gas`	`str`	either "ch4" or "co2"	required
`axs`	`Any`	axis of plt.subplots object	required
`label_eo`	`str`	legend in plot for earth observations, by default "satellite"	`'satellite'`
`label_gb`	`str`	legend in plot for ground-based data, by default "ground_based"	`'ground_based'`

Returns:

Type	Description
`Any`	axis object from matplotlib.pyplot

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_monthly_average(
    d: pd.DataFrame,
    gas: str,
    axs: Any,
    label_eo: str = "satellite",
    label_gb: str = "ground_based",
) -> Any:
    """
    Plot global, monthly average of GHG concentration

    Parameters
    ----------
    d :
        dataframe with raw data

    gas :
        either "ch4" or "co2"

    axs :
        axis of plt.subplots object

    label_eo :
        legend in plot for earth observations,
        by default "satellite"

    label_gb :
        legend in plot for ground-based data,
        by default "ground_based"

    Returns
    -------
    :
        axis object from matplotlib.pyplot
    """
    try:
        import seaborn as sns
    except ImportError as exc:
        raise MissingOptionalDependencyError("plotting", requirement="seaborn") from exc

    d["time_fractional"] = d.year + d.month / 12
    d_avg = (
        d.groupby(["year", "month"])
        .agg({"value_gb": "mean", "value_eo": "mean", "time_fractional": "mean"})
        .reset_index()
    )

    sns.lineplot(data=d_avg, x="time_fractional", y="value_eo", ax=axs, label=label_eo)
    sns.lineplot(data=d_avg, x="time_fractional", y="value_gb", ax=axs, label=label_gb)
    axs.set_xlabel("time")
    axs.set_ylabel(gas)
    axs.legend(frameon=False)
    axs.spines["right"].set_visible(False)
    axs.spines["top"].set_visible(False)
    return axs

plot_prophet_components #

plot_prophet_components(
    components: dict[str, DataFrame],
    gas: str,
    unit: str,
    x_years: tuple[int, int],
    figsize: Optional[tuple[int, int]] = None,
) -> Any

Plot trend and seasonality components from Prophet models.

Creates a figure with two rows: trend components (top) and seasonality components (bottom), with separate panels for each region (Southern, Tropical, Northern).

Parameters:

Name	Type	Description	Default
`components`	`dict[str, DataFrame]`	Dictionary with keys 'southern', 'tropical', 'northern', each containing a DataFrame with columns 'ds', 'trend', 'yearly'.	required
`gas`	`str`	Name of greenhouse gas (e.g., 'co2', 'ch4').	required
`unit`	`str`	Unit of measurement (e.g., 'ppm', 'ppb').	required
`x_years`	`tuple[int, int]`	Tuple of start and end years for the x-axis.	required
`figsize`	`Optional[tuple[int, int]]`	Size of the figure. If None, uses (12, 8).	`None`

Returns:

Type	Description
`Any`	fig, axs tuple from matplotlib.

Source code in src/ghg_forcing_for_cmip/plotting.py

def plot_prophet_components(
    components: dict[str, pd.DataFrame],
    gas: str,
    unit: str,
    x_years: tuple[int, int],
    figsize: Optional[tuple[int, int]] = None,
) -> Any:
    """Plot trend and seasonality components from Prophet models.

    Creates a figure with two rows: trend components (top) and
    seasonality components (bottom), with separate panels for
    each region (Southern, Tropical, Northern).

    Parameters
    ----------
    components
        Dictionary with keys 'southern', 'tropical', 'northern',
        each containing a DataFrame with columns 'ds', 'trend', 'yearly'.
    gas
        Name of greenhouse gas (e.g., 'co2', 'ch4').
    unit
        Unit of measurement (e.g., 'ppm', 'ppb').

    x_years
        Tuple of start and end years for the x-axis.
    figsize
        Size of the figure. If None, uses (12, 8).

    Returns
    -------
    Any
        fig, axs tuple from matplotlib.
    """
    if figsize is None:
        figsize = (12, 8)

    fig, axs = plt.subplots(
        2, 3, figsize=figsize, constrained_layout=True, sharex="col"
    )

    region_order = ["southern", "tropical", "northern"]
    region_labels = ["Southern", "Tropical", "Northern"]
    colors = ["tab:orange", "tab:green", "tab:blue"]

    # Plot trend components (top row)
    for i, (region, label, color) in enumerate(
        zip(region_order, region_labels, colors)
    ):
        if region in components:
            df = components[region]
            axs[0, i].plot(df["ds"], df["trend"], color=color, lw=2)
            axs[0, i].set_title(f"{label} Hemisphere")
            axs[0, i].set_ylabel(f"Trend [{unit}]")
            axs[0, i].spines[["right", "top"]].set_visible(False)
            axs[0, i].grid(True, alpha=0.3)

    # Plot seasonality components (bottom row)
    for i, (region, label, color) in enumerate(
        zip(region_order, region_labels, colors)
    ):
        if region in components:
            df = components[region]
            axs[1, i].plot(df["ds"], df["yearly"], color=color, lw=2)
            axs[1, i].set_title(f"{label} Hemisphere")
            axs[1, i].set_ylabel(f"Seasonality [{unit}]")
            axs[1, i].set_xlabel("Date")
            axs[1, i].spines[["right", "top"]].set_visible(False)
            axs[1, i].grid(True, alpha=0.3)
            axs[1, i].axhline(y=0, color="black", linestyle="--", lw=0.5, alpha=0.5)

    fig.suptitle(
        f"{gas.upper()} Prophet Model Components: Trend and Seasonality",
        fontsize=14,
        y=1.02,
    )

    xlim_start = pd.Timestamp(f"{x_years[0]}-01-01")
    xlim_end = pd.Timestamp(f"{x_years[1]}-12-31")

    tick_years = [
        pd.Timestamp(f"{year}-01-01") for year in range(x_years[0], x_years[1], 5)
    ]

    for row in axs:
        for ax in row:
            ax.set_xlim(xlim_start, xlim_end)
            ax.set_xticks(tick_years)
            ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))  # type: ignore[no-untyped-call]
            ax.tick_params(axis="x", labelsize=8)

    if fig._suptitle is not None:  # type: ignore[attr-defined]
        current_text = fig._suptitle.get_text()  # type: ignore[attr-defined]
        fig.suptitle(current_text, fontsize=14, y=1.08)

    return fig, axs