Source code for raptorstats.api

import warnings

from raptorstats.agqt import AggQuadTree
from raptorstats.io import (
    open_raster,
    open_vector,
    validate_is_north_up,
    validate_raster_vector_compatibility,
)
from raptorstats.scanline import Scanline
from raptorstats.stats import Stats


[docs] def zonal_stats( vectors, raster, stats=None, layer=0, band=1, nodata=None, affine=None, prefix=None, categorical=False, method="scanline", **kwargs, ): """Zonal statistics using the AggQuadTree or Scanline methods. The method supports quite a few input types for both vectors and rasters, with options to select band, layers, specify affine, etc. However, the loading method will make assumptions for some of them, so if your input is exotic (or you see errors), ideally you might want to transform it yourself to a north-up, one-band, no-shearing rasterio DatasetReader and a geopandas closed-polygon-only GeoDataFrame with the same CRS as the raster. Parameters ---------- vectors : str | Path | geopandas.GeoDataFrame | shapely.geometry.base.BaseGeometry | Sequence[BaseGeometry] | dict (GeoJSON-like mapping) | anything GeoPandas can read. Vector source containing the geometries. Currently only closed polygons are supported (no single points or lines). raster : str | Path | rasterio.io.DatasetReader | np.ndarray | anything rasterio can read Raster source containing the data to compute statistics on. stats : list[str] Statistics to compute. Default: ["count", "min", "max", "mean"], also supported: ["sum", "std", "median", "majority", "minority", "unique", "range", "nodata", "nan"]. layer : int | str Layer name/number for multi-layer vector sources band : int Raster band (default 1) nodata : float | None Overrides raster NODATA value affine : affine.Affine | None Required if `raster` is a NumPy array prefix : str | None Prefix every stat key (useful for merges) categorical : bool Whether to compute categorical stats (histogram) method : str Zonal statistics method to use ('agqt' or 'scanline'). Default 'scanline'. - *scanline*: builds one line per raster row, calculates the intersections with the vector layer, identifying the pixels corresponding to each vector feature, and computes the statistics. It reads the raster in a single pass, row by row, computing all the feature stats. - *agqt*: builds an AggQuadTree index for the raster, which allows for fast querying of the statistics for a grid of rectangular features. During the zonal_stats call, it identifies the rectangular features inside the vector geometries and takes the pre-computed stats from the index file, while using the scanline method for pixels inside the geometry that are not covered by the rectangular features. build_indices : bool Whether to force build (overwrite) the AggQuadTree index if it already exists. Default False. index_path : str Path to the index file (if using agqt), default None. If the file doesn't exist, it will be created. If not specified, the program will try to use a default path in the current running dir. max_depth : int Maximum depth for the AggQuadTree index (if using agqt), default 5. The index building time for the `agqt` method depends on the specified tree depth. For example, building a depth-10 quadtree requires significant time and memory, as the algorithm needs to compute statistics for about (4^11 - 1) / 3 ≈ 1.4 million rectangular features. The depth of the tree needed depends on the size of the expected features. A poorly chosen depth (too low, so that leaf nodes don't fit inside the features, or too high, making the tree unnecessarily large) can lead to slower times for the `agqt` method compared to the `scanline` method. Returns ------- list[dict] one stats-dict per input feature """ stats_conf = Stats(stats, categorical=categorical) if method == "agqt": func = AggQuadTree(**kwargs) elif method == "scanline": func = Scanline(**kwargs) else: raise ValueError(f"Unknown method: {method}. Use 'agqt' or 'scanline'.") with open_raster(raster, affine=affine, nodata=nodata, band=band) as ds: gdf = open_vector(vectors, layer=layer, affine=affine) validate_is_north_up(ds.transform) validate_raster_vector_compatibility(ds, gdf) results = func(ds, gdf, stats=stats_conf) results = stats_conf.clean_results(results) if prefix: results = [ {f"{prefix}{k}": v for k, v in res.items()} if res is not None else None for res in results ] return results
[docs] def build_agqt_index( raster, max_depth, stats=None, band=1, nodata=None, affine=None, categorical=False, index_path=None, ): """Build AggQuadTree index files for the given raster file. Parameters ---------- raster : str | Path | rasterio.io.DatasetReader | np.ndarray | anything rasterio can read max_depth : int Maximum depth for the AggQuadTree index. The index building time for the `agqt` method depends on the specified tree depth. For example, building a depth-10 quadtree requires significant time and memory, as the algorithm needs to compute statistics for about (4^11 - 1) / 3 ≈ 1.4 million rectangular features. The depth of the tree needed depends on the size of the expected features. A poorly chosen depth (too low, so that leaf nodes don't fit the features, or too high, making the tree unnecessarily large) can lead to slower times for the `agqt` method compared to the `scanline` method. stats : list[str] Statistics to compute. Default: ["count", "min", "max", "mean"], also supported: ["sum", "std", "median", "majority", "minority", "unique", "range", "nodata", "nan"]. Important: the computed stats for the indices must match the later required stats for the computation, when running zonal_stats. band : int Raster band (1-based). nodata : float | None Overrides raster NODATA value. affine : affine.Affine | None Required if `raster` is a NumPy array. categorical : bool Whether to compute categorical stats (histogram). index_path : str | None Path to the index file (if using AggQuadTree), default None. If the file doesn't exist, it will be created. If not specified, the program will try to use a default path in the current running dir. Returns ------- rtree.index.Index The AggQuadTree index object (it is already saved to a file). """ stats_conf = Stats(stats, categorical=categorical) agqt = AggQuadTree(max_depth=max_depth, index_path=index_path, build_index=True) agqt.stats = stats_conf with open_raster(raster, affine=affine, nodata=nodata, band=band) as ds: validate_is_north_up(ds.transform) if agqt._index_files_exist(): warnings.warn( f"Index files {agqt.index_path}.idx and {agqt.index_path}.dat already exist. They will be overwritten.", ) agqt._delete_index_files() agqt._compute_quad_tree(ds) return agqt.idx # Return the index object for further use or inspection