Source code for flusstools.fuzzycorr.prepro

"""

Hints:
    - many class methods could be imported from geotools
    - already removed: clip_raster, which is a duplicate of raster_mgmt
"""


from geotools import *


[docs]class FuzzyPreProcessor: """Parent pre-processing structure for the comparison of numeric maps :param df: pandas.DataFrame, can be obtained by reading the textfile as pandas dataframe :param attribute (str): name of the attribute to burn in the raster (ex.: deltaZ, Z) :param crs (str): coordinate reference system :param nodatavalue (float): value to indicate nodata cells :param res (float): resolution of the cell (cell size), is the same for x and y :param ulc: tuple of floats, upper left corner coordinate, optional :param lrc: tuple of floats, lower right corner coordinate, optional """ def __init__(self, df, attribute, crs, nodatavalue, res=None, ulc=(np.nan, np.nan), lrc=(np.nan, np.nan)): if not isinstance(attribute, str): print("ERROR: attribute must be a string - check the name on your textfile") self.crs = pyproj.CRS(crs) self.attribute = attribute self.nodatavalue = nodatavalue # Standardize the dataframe df.dropna(how='any', inplace=True, axis=0) # Create the dictionary with new label names and then rename for standardization new_names = {df.columns[0]: 'x', df.columns[1] : 'y', df.columns[2]: self.attribute} self.df = df.rename(columns=new_names) # Create geodataframe from the dataframe gdf = geopandas.GeoDataFrame( self.df, geometry=geopandas.points_from_xy(self.df.x, self.df.y)) gdf.crs = self.crs self.gdf = gdf self.x = gdf.geometry.x.values self.y = gdf.geometry.y.values self.z = gdf[attribute].values if np.isfinite(ulc[0]) and np.isfinite(lrc[0]): self.xmax = lrc[0] self.xmin = ulc[0] self.ymax = ulc[1] self.ymin = lrc[1] else: self.xmax = self.gdf.geometry.x.max() self.xmin = self.gdf.geometry.x.min() self.ymax = self.gdf.geometry.y.max() self.ymin = self.gdf.geometry.y.min() self.extent = (self.xmin, self.xmax, self.ymin, self.ymax) if np.isfinite(res): self.res = res else: # if res not passed, then res will be the distance between xmin and xmax / 1000 self.res = (self.xmax - self.xmin) / 1000 self.ncol = int(np.ceil((self.xmax - self.xmin) / self.res)) # delx self.nrow = int(np.ceil((self.ymax - self.ymin) / self.res)) # dely
[docs] def points_to_grid(self): """Creates a grid of new points in the target resolution :returns: array of size nrow, ncol Hints: Read more at http://chris35wills.github.io/gridding_data/ """ hrange = ((self.ymin, self.ymax), (self.xmin, self.xmax)) # any points outside of this will be condisdered outliers and not used zi, yi, xi = np.histogram2d(self.y, self.x, bins=(int(self.nrow), int(self.ncol)), weights=self.z, normed=False, range=hrange) counts, _, _ = np.histogram2d(self.y, self.x, bins=( int(self.nrow), int(self.ncol)), range=hrange) # ignores errors if dividing by zero np.seterr(divide='ignore', invalid='ignore') zi = np.divide(zi, counts) np.seterr(divide=None, invalid=None) # set it back now zi = np.ma.masked_invalid(zi) array = np.flipud(np.array(zi)) # flips each column upside down return array
[docs] def norm_array(self, method='linear'): """ Normalizes the raw data in equally distanced points depending on the selected resolution :returns: interpolated and normalized array with selected resolution Hint: Read more at https://github.com/rosskush/skspatial """ array = self.points_to_grid() x = np.arange(0, self.ncol) # creates 1d array with values [0, ncol[ y = np.arange(0, self.nrow) # mask invalid values # all invalid values are masked (ex.: np.inf or np.nan) array = np.ma.masked_invalid(array) # creates a grid of values with (x,y) based on the x and y provided xx, yy = np.meshgrid(x, y) # get only the valid values x1 = xx[~array.mask] # takes only unmasked points y1 = yy[~array.mask] newarr = array[~array.mask] out_array = interpolate.griddata((x1, y1), newarr.ravel( ), (xx, yy), method=method, fill_value=self.nodatavalue) return out_array
[docs] def random_raster(self, raster_file, save_ascii=True, **kwargs): """ Creates a raster of randomly generated values :kwarg minmax: tuple of floats, (zmin, zmax) min and max ranges for random values :returns numpy.ndarray: array of random values within a range of the same size and shape as the original """ if kwargs['minmax'] is None: zmin, zmax = self.z.min(), self.z.max() else: zmin, zmax = kwargs['minmax'] array = np.random.uniform( low=zmin, high=zmax, size=(self.nrow, self.ncol)) if '.' not in raster_file[-4:]: raster_file += '.tif' transform = rio.transform.from_origin( self.xmin, self.ymax, self.res, self.res) new_dataset = rio.open(raster_file, 'w', driver='GTiff', height=array.shape[0], width=array.shape[1], count=1, dtype=array.dtype, crs=self.crs, transform=transform, nodata=self.nodatavalue) print('The array has size: ', np.shape(array)) new_dataset.write(array, 1) new_dataset.close() if save_ascii: map_asc = str(Path(raster_file[0:-4] + '.asc')) gdal.Translate(map_asc, raster_file, format='AAIGrid') return new_dataset
[docs] def plain_raster(self, shapefile, raster_file, res): """Converts a shapefile(.shp) to a GeoTIFF raster without normalizing :param shapefile (str): filename with path of the input shapefile (*.shp) :param raster_file (str): filename with path of the output raster (*.tif) :param res (float): resolution of the cell :returns None: saves the raster in the default directory """ if '.' not in shapefile[-4:]: shapefile += '.shp' if '.' not in raster_file[-4:]: raster_file += '.tif' source_ds = ogr.Open(shapefile) source_layer = source_ds.GetLayer() x_min, x_max, y_min, y_max = source_layer.GetExtent() # Create Target - TIFF cols = int((x_max - x_min) / res) rows = int((y_max - y_min) / res) _raster = gdal.GetDriverByName('GTiff').Create( raster_file, cols, rows, 1, gdal.GDT_Float32) _raster.SetGeoTransform((x_min, res, 0, y_max, 0, res)) _band = _raster.GetRasterBand(1) _band.SetNoDataValue(self.nodatavalue) # Rasterize gdal.RasterizeLayer(_raster, [1], source_layer, options=[ 'ATTRIBUTE=' + self.attribute])
[docs] def array2raster(self, array, raster_file, save_ascii=True): """Saves a raster using interpolation :param raster_file (str): path to save the rasterfile :param save_ascii (bool): true to save also an ascii raster :returns None: Saves the raster with the selected filename Hint: Function will be moved to ``geotools/raster_mgmt`` in a future release (operated by Bea) """ if '.' not in raster_file[-4:]: raster_file += '.tif' transform = rio.transform.from_origin( self.xmin, self.ymax, self.res, self.res) new_dataset = rio.open(raster_file, 'w', driver='GTiff', height=array.shape[0], width=array.shape[1], count=1, dtype=array.dtype, crs=self.crs, transform=transform, nodata=self.nodatavalue) print(np.shape(array)) new_dataset.write(array, 1) new_dataset.close() if save_ascii: map_asc = str(Path(raster_file[0:-4] + '.asc')) gdal.Translate(map_asc, raster_file, format='AAIGrid') return new_dataset
[docs] def create_polygon(self, shape_polygon, alpha=np.nan): """ Creates a polygon surrounding a cloud of shapepoints :param shape_polygon (str): path to save the shapefile :param alpha (float): excentricity of the alphashape (polygon) to be created :returns: saves the polygon (*.shp) with the selected filename Hint: Function can be moved to geotools/shp_mgmt """ if np.isfinite(alpha): try: polygon = alphashape.alphashape(self.gdf, alpha) polygon.crs = self.crs polygon.to_file(shape_polygon) print('Polygon *.shp saved successfully.') except FileNotFoundError as e: print(e) else: try: polygon = alphashape.alphashape(self.gdf) except FileNotFoundError as e: print(e) else: polygon.crs = self.crs polygon.to_file(shape_polygon) print('Polygon *.shp saved successfully.')
[docs]class CategorizationPreProcessor: """Structured for ... (Description to be implemented by Bea) :param raster (str): path of the raster to be categorized """ def __init__(self, raster): self.raster = raster with rio.open(self.raster) as src: raster_np = src.read(1, masked=True) self.nodatavalue = src.nodata # storing nodatavalue of raster self.meta = src.meta.copy() self.array = raster_np
[docs] def nb_classes(self, n_classes): """ Generates class bins based on the Natural Breaks method :param n_classes: integer, number of classes :returns: list of optimized bins """ # Classification based on Natural Breaks array_values = self.array[~self.array.mask].ravel() breaks = mc.NaturalBreaks(array_values, k=n_classes) # bins being (], (], (]....(] always including the right print('The upper bound of the classes are:', breaks.bins) print('Number of counts for each class, respectively:', breaks.counts) print('max: ', array_values.max(), 'min: ', array_values.min()) return breaks.bins
[docs] def categorize_raster(self, class_bins, map_out, save_ascii=True): """Classifies the raster according to the classification bins :param map_out: path of the project directory :param class_bins: list of floats :param save_ascii: bool :returns: saves the classified raster in the chosen directory """ # Classify the original image array (digitize makes nodatavalues take the class 0) raster_fi = np.ma.filled(self.array, fill_value=-np.inf) # bins[i-1] < array <= bins[i] raster_class = np.digitize(raster_fi, class_bins, right=True) # Assigns nodatavalues back to array raster_ma = np.ma.masked_where(raster_class == 0, raster_class, copy=True) # Fill nodatavalues into array raster_ma_fi = np.ma.filled(raster_ma, fill_value=self.nodatavalue) # raster_ma_fi = np.ma.filled(raster_class, fill_value=self.nodatavalue) if raster_ma_fi.min() == self.nodatavalue or type(raster_ma_fi) != np.ma.MaskedArray: with rio.open(map_out, 'w', **self.meta) as outf: outf.write(raster_ma_fi.astype(rio.float64), 1) else: raise TypeError("Error filling NoDataValue to raster file") if save_ascii: map_asc = str(Path(map_out[0:-4] + '.asc')) gdal.Translate(map_asc, map_out, format='AAIGrid')