eorst/
lib.rs

1//! # EORST — Earth Observation and Remote Sensing Toolkit
2//!
3//! A high-performance Rust library for processing satellite imagery. Designed for
4//! researchers and engineers who need to process large geospatial raster datasets
5//! efficiently — from querying STAC catalogs to computing spectral indices across
6//! thousands of scenes.
7//!
8//! ## Quick Start
9//!
10//! [`RasterDataset`] is the main entry point. Combine it with
11//! [`rss_core`](https://crates.io/crates/rss_core) to query cloud-optimised
12//! satellite imagery and process it in parallel blocks:
13//!
14//! ```rust,ignore
15//! use std::path::PathBuf;
16//! use chrono::NaiveDate;
17//! use anyhow::Result;
18//! use ndarray::Array4;
19//!
20//! use rss_core::{DEA, query::ImageQueryBuilder, qvf::Collection, utils::{Cmp, Intersects}};
21//! use eorst::{types::BlockSize, RasterDatasetBuilder, DataSourceBuilder, RasterDataBlock};
22//!
23//! // ── 1. Query DEA Sentinel-2 ARD via STAC ──────────────────────────────────
24//! let source = DEA.clone();
25//! let query = ImageQueryBuilder::new(
26//!     source,
27//!     Collection::Sentinel2,
28//!     Intersects::Scene(vec!["56jmr"]),
29//! )
30//! .canonical_bands(["red", "nir"])
31//! .start_date(NaiveDate::parse_from_str("2022-01-01", "%Y-%m-%d")?)
32//! .end_date(NaiveDate::parse_from_str("2022-01-15", "%Y-%m-%d")?)
33//! .cloudcover((Cmp::Less, 10))
34//! .build();
35//!
36//! let output_dir = PathBuf::from("/tmp/DEA_S2");
37//! let _ = query.get(&output_dir, None, None)?;  // download scenes
38//!
39//! // ── 2. Build a RasterDataset from the downloaded scenes ───────────────────────
40//! let scene_files: Vec<_> = std::fs::read_dir(&output_dir)?
41//!     .filter_map(|e| e.ok())
42//!     .filter(|e| e.path().extension().map_or(false, |ext| ext == "tif"))
43//!     .map(|e| e.path())
44//!     .collect();
45//!
46//! let rds = RasterDatasetBuilder::<u16>::from_sources(&scene_files)
47//!     .block_size(BlockSize { cols: 2048, rows: 2048 })
48//!     .build();
49//!
50//! // ── 3. Apply a parallel worker across all blocks ──────────────────────────────
51//! fn ndvi_worker(block: &RasterDataBlock<u16>) -> Result<Array4<i16>> {
52//!     let red = block.select_layers(&["red"])?;
53//!     let nir = block.select_layers(&["nir"])?;
54//!     let red_f: ndarray::Array4<f32> = red.data.mapv(|v| v as f32);
55//!     let nir_f: ndarray::Array4<f32> = nir.data.mapv(|v| v as f32);
56//!     let ndvi = ((&nir_f - &red_f) / (&nir_f + &red_f + 1e-10)) * 10000.0;
57//!     Ok(ndvi.mapv(|v| v as i16))
58//! }
59//!
60//! rds.apply::<i16>(ndvi_worker, 8, &PathBuf::from("ndvi_output.tif"))?;
61//! ```
62//!
63//! The same pattern works for any worker function — mosaicking, band math,
64//! machine-learning classification, zonal statistics, and more.
65//!
66//! ## Core Types
67//!
68//! - **[`RasterDataset`]** — The main data structure. Holds metadata and blocks for
69//!   an entire raster. Use [`RasterDatasetBuilder`] to construct one.
70//! - **[`RasterDataBlock`]** — A block of raster data with metadata. Passed to your
71//!   worker function by [`apply`](RasterDataset::apply) and [`apply_reduction`](RasterDataset::apply_reduction).
72//! - **[`DataSourceBuilder`]** — Build data sources from files or STAC queries.
73//! - **[`Select`]** — Select layers and time slices by name from a [`RasterDataBlock`].
74//!
75//! ## Processing Methods
76//!
77//! [`RasterDataset`] provides parallel block-processing methods:
78//!
79//! - **[`apply`](RasterDataset::apply)** — Apply a worker to each block, writing results
80//!   directly to a GeoTIFF. The most common entry point.
81//! - **[`apply_with_mask`](RasterDataset::apply_with_mask)** — Apply a worker using two
82//!   datasets, where the second acts as a mask.
83//! - **[`apply_reduction`](RasterDataset::apply_reduction)** — Reduce a dimension (e.g.
84//!   mean over time) and write the result.
85//! - **[`apply_reduction_with_mask`](RasterDataset::apply_reduction_with_mask)** —
86//!   Reduce with a mask dataset.
87//! - **[`apply_mosaic`](RasterDataset::apply_mosaic)** — Mosaic the dataset to a single file.
88//!
89//! ## Highlights
90//!
91//!  - Block-based parallel processing — handles datasets larger than memory
92//!  - Works on laptop, HPC, or cloud — same code, same architecture
93//!  - On-the-fly reprojection and resolution changes
94//!  - Point raster sampling extraction
95//!  - Time series analysis and band math
96//!  - OpenCV integration via `use_opencv` feature
97//!  - LightGBM and XGBoost integration via `use_lgbm` feature
98//!  - Nix-managed reproducible environments
99//!
100//! ## Crate Status
101//!
102//! - Still iterating and evolving. Breaking changes are expected between versions.
103//!  - The API is stabilizing. Contributions and feedback are welcome.
104//!
105//! ## Installation
106//!
107//! Add to `Cargo.toml`:
108//!
109//! ```toml
110//! [dependencies]
111//! eorst = "1.0"
112//! ```
113//!
114//! Optional features:
115//!
116//! ```toml
117//! eorst = { version = "1.0", features = ["use_opencv"] }  # OpenCV computer vision
118//! eorst = { version = "1.0", features = ["use_lgbm"] }   # LightGBM ML classification
119//! ```
120//!
121//! ## CLI
122//!
123//! The [`eors`](https://gitlab.com/jrsrp/sys/eors_workspace/-/tree/main/apps/eors) CLI
124//! provides command-line access to common workflows. See the
125//! [install docs](crate::standalone_docs) for setup instructions.
126//!
127//! ## Summary
128//!
129//! This crate offers a library aiming to simplify the writing of raster processing pipelines in rust.
130
131// ─── Module declarations ───
132
133/// Core type definitions for raster data handling.
134pub mod core_types;
135/// Parallel GeoTIFF writer for direct windowed writes (replaces mosaic subprocess chain).
136pub mod parallel_writer;
137/// Geospatial and array shape types.
138pub mod types;
139/// Data source definitions for raster datasets.
140pub mod data_sources;
141/// Metadata types for raster datasets.
142pub mod metadata;
143/// Block types for raster dataset processing.
144pub mod blocks;
145/// GDAL utility functions for raster processing.
146pub mod gdal_utils;
147/// Array manipulation operations for raster processing.
148pub mod array_ops;
149/// Selection and aggregation traits for raster data.
150pub mod selection;
151/// Image processing filters and OpenCV integration.
152pub mod filters;
153/// RasterDataset and RasterDatasetBuilder for creating and manipulating raster data.
154pub mod rasterdataset;
155pub mod stac_helpers;
156pub mod standalone_docs;
157/// Async I/O using async-tiff for direct S3 access.
158pub mod async_io;
159mod tests;
160
161// ─── Re-exports ───
162
163pub use core_types::{RasterData, RasterType};
164pub use data_sources::{DataSourceBuilder, DateType};
165pub use metadata::{Extent, RasterDataBlock, RasterMetadata};
166pub use blocks::RasterBlock;
167pub use crate::selection::Select;
168pub use crate::selection::SelectError;
169pub use rasterdataset::{RasterDataset, RasterDatasetBuilder};
170
171#[cfg(feature = "use_opencv")]
172pub use opencv;
173
174#[cfg(feature = "use_polars")]
175pub use polars;
176
177pub use gdal;
178
179#[cfg(feature = "use_polars")]
180pub use crate::rasterdataset::zonal_stats::save_zonal_histograms;
181
182pub use stac::ItemCollection;
183
184use env_logger::Env;
185
186// ─── Utility functions ───
187
188/// Initializes the logging system with default settings.
189pub fn init_logger() {
190    let env = Env::default().filter_or("LOG_LEVEL", "info");
191    env_logger::init_from_env(env);
192}
193
194// ─── Type aliases ───
195
196use ndarray::{Array2, Array3};
197
198/// Type alias for 2D raster block slices backed by ndarray.
199pub type RasterBlockSlice2<T> = Array2<T>;
200/// Type alias for 3D raster block slices backed by ndarray.
201pub type RasterBlockSlice3<T> = Array3<T>;