Source code for pylidar.toolbox.indexing.gridindex

Deals with creating a grid spatial index
# This file is part of PyLidar
# Copyright (C) 2015 John Armston, Pete Bunting, Neil Flood, Sam Gillingham
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <>.
from __future__ import print_function, division

import os
import sys
import copy
import numpy
import tempfile
from pylidar import lidarprocessor
from pylidar.lidarformats import spdv4
from pylidar.lidarformats import las
from pylidar.lidarformats import generic
from pylidar.basedriver import Extent
from rios import cuiprogress
from rios import pixelgrid

if sys.version_info[0] > 2:
    # hack for Python 3 which uses str instead of basestring
    # we just use basestring
    basestring = str

Number of blocks to divide the extent up into for the longest axis
if not given a blockSize.

Types of spatial indices. Copied from spdv4.

Types of pulse indexing methods. Copied from spdv4.

[docs]def createGridSpatialIndex(infile, outfile, binSize=1.0, blockSize=None, tempDir=None, extent=None, indexType=INDEX_CARTESIAN, pulseIndexMethod=PULSE_INDEX_FIRST_RETURN, wkt=None): """ Creates a grid spatially indexed file from a non spatial input file. Currently only supports creation of a SPD V4 file. Creates a tempfile for every block (using blockSize) and them merges them into the output building a spatial index as it goes. If blockSize isn't set then it is picked using BLOCKSIZE_N_BLOCKS. binSize is the size of the bins to create the spatial index. if tempDir is none a temporary directory will be created with tempfile.mkdtemp and removed at the end of processing. extent is an Extent object specifying the extent to work within. indexType is one of the INDEX_* constants. pulseIndexMethod is one of the PULSE_INDEX_* constants. wkt is the projection to use for the output. Copied from the input if not supplied. nPulsesPerChunkMerge is the number of pulses to process at a time when merging. """ removeTempDir = False if tempDir is None: tempDir = tempfile.mkdtemp() removeTempDir = True header, extent, extentList = splitFileIntoTiles(infile, binSize=binSize, blockSize=blockSize, tempDir=tempDir, extent=extent, indexType=indexType, pulseIndexMethod=pulseIndexMethod) # update header header['INDEX_TLX'] = extent.xMin header['INDEX_TLY'] = extent.yMax header['INDEX_TYPE'] = indexType header['PULSE_INDEX_METHOD'] = pulseIndexMethod header['BIN_SIZE'] = binSize if wkt is None: wkt = header['SPATIAL_REFERENCE'] if len(wkt) == 0: wkt = getDefaultWKT() indexAndMerge(extentList, extent, wkt, outfile, header) # delete the temp files for fname, extent in extentList: os.remove(fname) # we must have created this directory - remove it if removeTempDir: os.rmdir(tempDir)
[docs]def splitFileIntoTiles(infiles, binSize=1.0, blockSize=None, tempDir='.', extent=None, indexType=INDEX_CARTESIAN, pulseIndexMethod=PULSE_INDEX_FIRST_RETURN, footprint=lidarprocessor.UNION, outputFormat='SPDV4', buildPulses=False): """ Takes a filename (or list of filenames) and creates a tempfile for every block (using blockSize). If blockSize isn't set then it is picked using BLOCKSIZE_N_BLOCKS. binSize is the size of the bins to create the spatial index. indexType is one of the INDEX_* constants. pulseIndexMethod is one of the PULSE_INDEX_* constants. footprint is one of lidarprocessor.UNION or lidarprocessor.INTERSECTION and is how to combine extents if there is more than one file. outputFormat is either 'SPDV4' or 'LAS'. 'LAS' outputs only supported when input is 'LAS'. buildPulses relevant for 'LAS' and determines whether to build the pulse structure or not. returns the header of the first input file, the extent used and a list of (fname, extent) tuples that contain the information for each tempfile. """ if isinstance(infiles, basestring): infiles = [infiles] # use the first file for header. Not # clear how to combine headers from multiple inputs # or if one should. # leave setting this in case we grab it when working out the extent. firstHeader = None if extent is None: # work out from headers pixGrid = None for infile in infiles: info = generic.getLidarFileInfo(infile) header = info.header if firstHeader is None: firstHeader = header try: if indexType == INDEX_CARTESIAN: xMax = header['X_MAX'] xMin = header['X_MIN'] yMax = header['Y_MAX'] yMin = header['Y_MIN'] elif indexType == INDEX_SPHERICAL: xMax = header['AZIMUTH_MAX'] xMin = header['AZIMUTH_MIN'] yMax = header['ZENITH_MAX'] yMin = header['ZENITH_MIN'] elif indexType == INDEX_SCAN: xMax = header['SCANLINE_IDX_MAX'] xMin = header['SCANLINE_IDX_MIN'] yMax = header['SCANLINE_MAX'] yMin = header['SCANLINE_MIN'] else: msg = 'unsupported indexing method' raise generic.LiDARSpatialIndexNotAvailable(msg) except KeyError: msg = 'info for creating bounding box not available' raise generic.LiDARFunctionUnsupported(msg) newPixGrid = pixelgrid.PixelGridDefn(xMin=xMin, xMax=xMax, yMin=yMin, yMax=yMax, xRes=binSize, yRes=binSize) if pixGrid is None: pixGrid = newPixGrid elif footprint == lidarprocessor.UNION: pixGrid = pixGrid.union(newPixGrid) elif footprint == lidarprocessor.INTERSECTION: pixGrid = pixGrid.intersection(newPixGrid) else: msg = 'Unsupported footprint option' raise generic.LiDARFunctionUnsupported(msg) # TODO: we treat points as being in the block when they are >= # the min coords and < the max coords. What happens on the bottom # and right margins?? We could possibly miss points that are there. # round the coords to the nearest multiple xMin = numpy.floor(pixGrid.xMin / binSize) * binSize yMin = numpy.floor(pixGrid.yMin / binSize) * binSize xMax = numpy.ceil(pixGrid.xMax / binSize) * binSize yMax = numpy.ceil(pixGrid.yMax / binSize) * binSize extent = Extent(xMin, xMax, yMin, yMax, binSize) else: # ensure that our binSize comes from their exent binSize = extent.binSize # get the first header since we aren't doing the above info = generic.getLidarFileInfo(infiles[0]) firstHeader = info.header if blockSize is None: minAxis = min(extent.xMax - extent.xMin, extent.yMax - extent.yMin) blockSize = min(minAxis / BLOCKSIZE_N_BLOCKS, 200.0) # make it a multiple of binSize blockSize = int(numpy.ceil(blockSize / binSize)) * binSize else: # ensure that their given block size can be evenly divided by # the binSize # the modulo operator doesn't work too well with floats # so we take a different approach a = blockSize / binSize if a != int(a): msg = 'blockSize must be evenly divisible be the binSize' raise generic.LiDARInvalidData(msg) extentList = [] subExtent = Extent(extent.xMin, extent.xMin + blockSize, extent.yMax - blockSize, extent.yMax, binSize) controls = lidarprocessor.Controls() controls.setSpatialProcessing(False) tmpSuffix = '.' + outputFormat.lower() bMoreToDo = True while bMoreToDo: fd, fname = tempfile.mkstemp(suffix=tmpSuffix, dir=tempDir) os.close(fd) userClass = lidarprocessor.LidarFile(fname, generic.CREATE) if outputFormat == 'SPDV4': userClass.setLiDARDriverOption('SCALING_BUT_NO_DATA_WARNING', False) driver = spdv4.SPDV4File(fname, generic.CREATE, controls, userClass) elif outputFormat == 'LAS': driver = las.LasFile(fname, generic.CREATE, controls, userClass) else: msg = 'Unsupported output format %s' % outputFormat raise generic.LiDARFunctionUnsupported(msg) data = (copy.copy(subExtent), driver) extentList.append(data) # move it along subExtent.xMin += blockSize subExtent.xMax += blockSize if subExtent.xMin >= extent.xMax: # next line down subExtent.xMin = extent.xMin subExtent.xMax = extent.xMin + blockSize subExtent.yMax -= blockSize subExtent.yMin -= blockSize # done? bMoreToDo = subExtent.yMax > extent.yMin # ok now set up to read the input files using lidarprocessor dataFiles = lidarprocessor.DataFiles() dataFiles.inputs = [] for infile in infiles: input = lidarprocessor.LidarFile(infile, lidarprocessor.READ) # must be a better way of doing this, but this is what # translate does. We don't know what formats we are getting ahead of time info = generic.getLidarFileInfo(infile) inFormat = info.getDriverName() if inFormat == 'LAS': input.setLiDARDriverOption('BUILD_PULSES', buildPulses) dataFiles.inputs.append(input) controls = lidarprocessor.Controls() progress = cuiprogress.GDALProgressBar() progress.setLabelText('Splitting...') controls.setProgress(progress) controls.setSpatialProcessing(False) controls.setMessageHandler(lidarprocessor.silentMessageFn) otherArgs = lidarprocessor.OtherArgs() otherArgs.outList = extentList otherArgs.indexType = indexType otherArgs.pulseIndexMethod = pulseIndexMethod lidarprocessor.doProcessing(classifyFunc, dataFiles, controls=controls, otherArgs=otherArgs) # close all the output files and save their names to return newExtentList = [] for subExtent, driver in extentList: fname = driver.fname driver.close() data = (fname, subExtent) newExtentList.append(data) return firstHeader, extent, newExtentList
[docs]def getDefaultWKT(): """ When processing data in sensor or project coordinates we may not have a WKT. However, rios.pixelgrid requires something. For now return the WKT for GDA96/MGA zone 55 until we think of something better. """ from osgeo import osr sr = osr.SpatialReference() sr.ImportFromEPSG(28355) return sr.ExportToWkt()
[docs]def copyScaling(input, output): """ Copy the known scaling required fields accross. Internal method. Called from classifyFunc. """ for arrayType in (lidarprocessor.ARRAY_TYPE_PULSES, lidarprocessor.ARRAY_TYPE_POINTS, lidarprocessor.ARRAY_TYPE_WAVEFORMS): try: fields = output.getScalingColumns(arrayType) except generic.LiDARInvalidSetting: continue for field in fields: try: gain, offset = input.getScaling(field, arrayType) output.setScaling(field, arrayType, gain, offset) except generic.LiDARFileException: pass
[docs]def setScalingForCoordField(driver, srcfield, coordfield): """ Internal method to set the output scaling for range of data. """ # srcfield and coordfield might not be the same type (pulses/points) # this happens when we are using point X and Y to set pulse X_IDX, Y_IDX etc if srcfield in spdv4.PULSE_SCALED_FIELDS: gain, offset = driver.getScaling(srcfield, lidarprocessor.ARRAY_TYPE_PULSES) elif srcfield in spdv4.POINT_SCALED_FIELDS: gain, offset = driver.getScaling(srcfield, lidarprocessor.ARRAY_TYPE_POINTS) else: # we need to set these in case an unscaled variable is used for the spatial index # e.g. using pulse SCANLINE and SCANLINE_IDX to set pulse X_IDX, Y_IDX gain = 1.0 offset = 0.0 if coordfield in spdv4.PULSE_SCALED_FIELDS: driver.setScaling(coordfield, lidarprocessor.ARRAY_TYPE_PULSES, gain, offset) elif coordfield in spdv4.POINT_SCALED_FIELDS: driver.setScaling(coordfield, lidarprocessor.ARRAY_TYPE_POINTS, gain, offset)
[docs]def classifyFunc(data, otherArgs): """ Called by lidarprocessor. Looks at the input data and splits into the appropriate output files. """ for input in data.inputs: pulses = input.getPulses() points = input.getPointsByPulse() waveformInfo = input.getWaveformInfo() recv = input.getReceived() trans = input.getTransmitted() # With LAS sometimes this happens. Not sure why... if len(pulses) == 0: continue for extent, driver in otherArgs.outList: if # deal with scaling. There must be a better way to do this. copyScaling(input, driver) # TODO: should we always be able to rely on X_IDX, Y_IDX for # whatever index we are building? # No - as the values of these columns may have to change if the # properties of the spatial indexing method change if otherArgs.indexType == INDEX_CARTESIAN: xIdxFieldName = 'X' yIdxFieldName = 'Y' xIdx, yIdx = indexPulses(pulses, points, otherArgs.pulseIndexMethod) elif otherArgs.indexType == INDEX_SPHERICAL: xIdxFieldName = 'AZIMUTH' yIdxFieldName = 'ZENITH' xIdx, yIdx = pulses[xIdxFieldName], pulses[yIdxFieldName] elif otherArgs.indexType == INDEX_SCAN: xIdxFieldName = 'SCANLINE_IDX' yIdxFieldName = 'SCANLINE' xIdx, yIdx = pulses[xIdxFieldName], pulses[yIdxFieldName] else: msg = 'unsupported indexing method' raise generic.LiDARSpatialIndexNotAvailable(msg) # ensure the scaling of X_IDX & Y_IDX matches the data we are putting in it if driver.getDriverName() == 'SPDV4': # only makes sense for SPDV4 since LAS doesn't really have an X_IDX etc setScalingForCoordField(driver, xIdxFieldName, 'X_IDX') setScalingForCoordField(driver, yIdxFieldName, 'Y_IDX') # this the expression used in the spatial index building # so we are consistent. mask = ((xIdx >= extent.xMin) & (xIdx < extent.xMax) & (yIdx > extent.yMin) & (yIdx <= extent.yMax)) # subset the data pulsesSub = pulses[mask] # this is required otherwise the pulses get stripped out # when we write the pulses in spatial mode (in indexAndMerge) pulsesSub['X_IDX'] = xIdx[mask] pulsesSub['Y_IDX'] = yIdx[mask] # subset the other data also to match pointsSub = points[..., mask] waveformInfoSub = None recvSub = None transSub = None if waveformInfo is not None and waveformInfo.size > 0: waveformInfoSub = waveformInfo[...,mask] if recv is not None and recv.size > 0: recvSub = recv[:,:,mask] if trans is not None and trans.size > 0: transSub = trans[:,:,mask] driver.writeData(pulsesSub, pointsSub, transSub, recvSub, waveformInfoSub)
[docs]def indexPulses(pulses, points, pulseIndexMethod): """ Internal method to assign a point coordinates to the X_IDX and Y_IDX columns based on the user specified pulse_index_method. """ if pulseIndexMethod == PULSE_INDEX_FIRST_RETURN: if points.shape[0] > 0: xIdx = points['X'][0, ...] yIdx = points['Y'][0, ...] else: xIdx = numpy.zeros(points.shape[1],dtype=points['X'].dtype) yIdx = numpy.zeros(points.shape[1],dtype=points['Y'].dtype) elif pulseIndexMethod == PULSE_INDEX_LAST_RETURN: if points.shape[0] > 0: firstfield = points.dtype.names[0] last = points[firstfield].count(axis=0) - 1 idx = numpy.arange(last.size) xIdx = points['X'][last, idx] yIdx = points['Y'][last, idx] else: xIdx = numpy.zeros(points.shape[1],dtype=points['X'].dtype) yIdx = numpy.zeros(points.shape[1],dtype=points['Y'].dtype) else: msg = 'unsupported pulse indexing method' raise generic.LiDARPulseIndexUnsupported(msg) return xIdx, yIdx
[docs]def indexAndMerge(extentList, extent, wkt, outfile, header): """ Internal method to merge all the temporary files into the output spatially indexing as we go. """ controls = lidarprocessor.Controls() controls.setSpatialProcessing(False) # open in read mode driverExtentList = [] for fname, subExtent in extentList: userClass = lidarprocessor.LidarFile(fname, generic.READ) driver = spdv4.SPDV4File(fname, generic.READ, controls, userClass) data = (subExtent, driver) driverExtentList.append(data) # create output file userClass = lidarprocessor.LidarFile(outfile, generic.CREATE) userClass.setLiDARDriverOption('SCALING_BUT_NO_DATA_WARNING', False) controls = lidarprocessor.Controls() controls.setSpatialProcessing(True) outDriver = spdv4.SPDV4File(outfile, generic.CREATE, controls, userClass) pixGrid = pixelgrid.PixelGridDefn(xMin=extent.xMin, xMax=extent.xMax, yMin=extent.yMin, yMax=extent.yMax, projection=wkt, xRes=extent.binSize, yRes=extent.binSize) outDriver.setPixelGrid(pixGrid) # update header nrows,ncols = pixGrid.getDimensions() header['NUMBER_BINS_X'] = ncols header['NUMBER_BINS_Y'] = nrows # clobber these values since we don't want to # start with the number in the original file # they will be reset to 0 in the new file del header['NUMBER_OF_POINTS'] del header['NUMBER_OF_PULSES'] # these too del header['GENERATING_SOFTWARE'] del header['CREATION_DATETIME'] progress = cuiprogress.GDALProgressBar() progress.setLabelText('Merging...') progress.setTotalSteps(len(extentList)) progress.setProgress(0) nFilesProcessed = 0 nFilesWritten = 0 for subExtent, driver in driverExtentList: # read in all the data # NOTE: can't write data in blocks as the driver needs to be able to # sort all the data in one go. bDataWritten = False npulses = driver.getTotalNumberPulses() if npulses > 0: pulseRange = generic.PulseRange(0, npulses) driver.setPulseRange(pulseRange) pulses = driver.readPulsesForRange() points = driver.readPointsByPulse() waveformInfo = driver.readWaveformInfo() recv = driver.readReceived() trans = driver.readTransmitted() outDriver.setExtent(subExtent) if nFilesWritten == 0: copyScaling(driver, outDriver) outDriver.setHeader(header) # on create, a spatial index is created outDriver.writeData(pulses, points, trans, recv, waveformInfo) nFilesWritten += 1 # close the driver while we are here driver.close() if bDataWritten: nFilesWritten += 1 nFilesProcessed += 1 progress.setProgress(nFilesProcessed) outDriver.close()