Python实现分段读取和保存遥感数据
作者:等待着冬天的风
当遇到批量读取大量遥感数据进行运算的时候,如果不进行分段读取操作的话,电脑内存可能面临着不够使用的情况,所以我们要进行分段读取数据然后进行运算,运算结束之后把这段数据保存成tif文件,本文介绍了Python实现分段读取和保存遥感数据,需要的朋友可以参考下
1 分段读取数据
如图所示,有三个这样的数据,且该数据为5600行6800列,我们可以分成10个批次分段读取该TIF数据,10个批次以此为0,560,1120,1680,2240,2800,3360,3920,4480,5040,5600。
代码实现:
import os import numpy as np from osgeo import gdal, gdalnumeric def read_tif(filepath): dataset = gdal.Open(filepath) col = dataset.RasterXSize#图像长度 row = dataset.RasterYSize#图像宽度 geotrans = dataset.GetGeoTransform()#读取仿射变换 proj = dataset.GetProjection()#读取投影 data = dataset.ReadAsArray()#转为numpy格式 data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return [col, row, geotrans, proj, data] def read_tif02(file): data = gdalnumeric.LoadFile(file) data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return data def get_all_file_name(ndvi_file): list1=[] if os.path.isdir(ndvi_file): fileList = os.listdir(ndvi_file) for f in fileList: file_name= ndvi_file+"\\"+f list1.append(file_name) return list1 else: return [] if __name__ == '__main__': file_ndvi = r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\NDVI主合成" file_out = r"D:\AAWORK\work\2021NDVISUM.tif" ndvi_file_list = get_all_file_name(file_ndvi) col00, row00, geotrans00, proj00, data_00_ndvi = read_tif(ndvi_file_list[0]) data_01_ndvi = read_tif02(ndvi_file_list[1]) data_02_ndvi = read_tif02(ndvi_file_list[2]) list_row = [0,560,1120,1680,2240,2800,3360,3920,4480,5040,5600] for index,i in enumerate(list_row): if index <= len(list_row)-2: print(list_row[index],list_row[index+1]) #分段进行操作 # sum_list = get_section(list_row[index],list_row[index+1],col00,data_00_ndvi,data_01_ndvi,data_02_ndvi) # 分段进行保存 # save_section(sum_list, list_row[index], list_row[index+1], col00,data_00_ndvi,data_01_ndvi,data_02_ndvi)
2 实现分批读取数据以及进行计算
拿到开始的行和结束的行数,进行分批读取数据并进行计算,(这里求和求的是整数,如有需要的话可以自己更改)代码如下:
import os import tensorflow as tf import numpy as np import pandas as pd from osgeo import gdal, gdalnumeric def get_sum_list(data_list): list1 = [] for data in data_list: sum = 0 for d in data: if not np.isnan(d): sum = sum+d list1.append(int(sum)) return list1 def read_tif(filepath): dataset = gdal.Open(filepath) col = dataset.RasterXSize#图像长度 row = dataset.RasterYSize#图像宽度 geotrans = dataset.GetGeoTransform()#读取仿射变换 proj = dataset.GetProjection()#读取投影 data = dataset.ReadAsArray()#转为numpy格式 data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return [col, row, geotrans, proj, data] def read_tif02(file): data = gdalnumeric.LoadFile(file) data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return data def get_all_file_name(ndvi_file): list1=[] if os.path.isdir(ndvi_file): fileList = os.listdir(ndvi_file) for f in fileList: file_name= ndvi_file+"\\"+f list1.append(file_name) return list1 else: return [] def get_nan_sum(ndvi_list): """ 得到NAN数据的个数 :param ndvi_list: :return: """ count = 0 for ndvi in ndvi_list: if np.isnan(ndvi): count = count+1 return count def get_section(row0, row1, col1,data1,data2,data3): """ 分段读取数据,读取的数据进行计算 :param row0: :param row1: :param col1: :param data1: :param data2: :param data3: :return: """ list1 = [] for i in range(row0, row1): # 行 for j in range(0, col1): # 列 ndvi_list = [] ndvi_list.append(data1[i][j]) ndvi_list.append(data2[i][j]) ndvi_list.append(data3[i][j]) if get_nan_sum(ndvi_list)>1: pass else: list1.append(ndvi_list) ndvi_list = None sum_list = get_sum_list(list1) list1 = None return sum_list if __name__ == '__main__': file_ndvi = r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\NDVI主合成" file_out = r"D:\AAWORK\work\2021NDVISUM.tif" ndvi_file_list = get_all_file_name(file_ndvi) col00, row00, geotrans00, proj00, data_00_ndvi = read_tif(ndvi_file_list[0]) data_01_ndvi = read_tif02(ndvi_file_list[1]) data_02_ndvi = read_tif02(ndvi_file_list[2]) list_row = [0,560,1120,1680,2240,2800,3360,3920,4480,5040,5600] for index,i in enumerate(list_row): if index <= len(list_row)-2: print(list_row[index],list_row[index+1]) #分段进行操作 sum_list = get_section(list_row[index],list_row[index+1],col00,data_00_ndvi,data_01_ndvi,data_02_ndvi) print(np.array(sum_list)) # 分段进行保存 # save_section(sum_list, list_row[index], list_row[index+1], col00,data_00_ndvi,data_01_ndvi,data_02_ndvi)
3 实现分批保存成TIF文件(所有完整代码)
在2中已经得到了每一批的list结果,我们拿到list结果之后,可以进行保存成tif文件。代码如下:
import os import numpy as np from osgeo import gdal, gdalnumeric def get_sum_list(data_list): list1 = [] for data in data_list: sum = 0 for d in data: if not np.isnan(d): sum = sum+d list1.append(int(sum)) return list1 def read_tif(filepath): dataset = gdal.Open(filepath) col = dataset.RasterXSize#图像长度 row = dataset.RasterYSize#图像宽度 geotrans = dataset.GetGeoTransform()#读取仿射变换 proj = dataset.GetProjection()#读取投影 data = dataset.ReadAsArray()#转为numpy格式 data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return [col, row, geotrans, proj, data] def read_tif02(file): data = gdalnumeric.LoadFile(file) data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return data def get_all_file_name(ndvi_file): list1=[] if os.path.isdir(ndvi_file): fileList = os.listdir(ndvi_file) for f in fileList: file_name= ndvi_file+"\\"+f list1.append(file_name) return list1 else: return [] def save_tif(data, file, output): """ 保存成tif :param data: :param file: :param output: :return: """ ds = gdal.Open(file) shape = data.shape driver = gdal.GetDriverByName("GTiff") dataset = driver.Create(output, shape[1], shape[0], 1, gdal.GDT_Float32)#保存的数据类型 dataset.SetGeoTransform(ds.GetGeoTransform()) dataset.SetProjection(ds.GetProjection()) dataset.GetRasterBand(1).WriteArray(data) def get_nan_sum(ndvi_list): """ 得到NAN数据的个数 :param ndvi_list: :return: """ count = 0 for ndvi in ndvi_list: if np.isnan(ndvi): count = count+1 return count def get_section(row0, row1, col1,data1,data2,data3): """ 分段读取数据,读取的数据进行计算 :param row0: :param row1: :param col1: :param data1: :param data2: :param data3: :return: """ list1 = [] for i in range(row0, row1): # 行 for j in range(0, col1): # 列 ndvi_list = [] ndvi_list.append(data1[i][j]) ndvi_list.append(data2[i][j]) ndvi_list.append(data3[i][j]) if get_nan_sum(ndvi_list)>1: pass else: list1.append(ndvi_list) ndvi_list = None sum_list = get_sum_list(list1) list1 = None return sum_list def save_section(sum_list, row0, row1, col1,data1,data2,data3): """ 保存分段的数据 :param sum_list: :param row0: :param row1: :param col1: :param data1: :param data2: :param data3: :return: """ file = r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\kongbai_zhu_250m.tif"#这是一个空白数据,每个像元的值为0 data = read_tif02(file) m = 0 for i in range(row0, row1): # 行 for j in range(0, col1): # 列 ndvi_list = [] ndvi_list.append(data1[i][j]) ndvi_list.append(data2[i][j]) ndvi_list.append(data3[i][j]) if get_nan_sum(ndvi_list)>1: pass else: data[i][j] = sum_list[m] m = m + 1 save_tif(data,file,file_out.replace(".tif","_"+str(row0)+"_"+str(row1)+".tif")) data = None if __name__ == '__main__': file_ndvi = r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\NDVI主合成" file_out = r"D:\AAWORK\work\2021NDVISUM.tif" ndvi_file_list = get_all_file_name(file_ndvi) col00, row00, geotrans00, proj00, data_00_ndvi = read_tif(ndvi_file_list[0]) data_01_ndvi = read_tif02(ndvi_file_list[1]) data_02_ndvi = read_tif02(ndvi_file_list[2]) list_row = [0,560,1120,1680,2240,2800,3360,3920,4480,5040,5600] for index,i in enumerate(list_row): if index <= len(list_row)-2: print(list_row[index],list_row[index+1]) #分段进行操作 sum_list = get_section(list_row[index],list_row[index+1],col00,data_00_ndvi,data_01_ndvi,data_02_ndvi) print(np.array(sum_list)) # 分段进行保存 save_section(sum_list, list_row[index], list_row[index+1], col00,data_00_ndvi,data_01_ndvi,data_02_ndvi)
4 分段TIF整合到一个TIF
我们要把上述10个TIF文件整合到一个TIF文件里,方法很多,我这里提供一个方法,供大家使用,代码如下:
import os from osgeo import gdalnumeric, gdal import numpy as np def get_all_file_name(file): list1=[] if os.path.isdir(file): fileList = os.listdir(file) for f in fileList: file_name= file+"\\"+f list1.append(file_name) return list1 else: return [] def read_tif(filepath): dataset = gdal.Open(filepath) col = dataset.RasterXSize#图像长度 row = dataset.RasterYSize#图像宽度 geotrans = dataset.GetGeoTransform()#读取仿射变换 proj = dataset.GetProjection()#读取投影 data = dataset.ReadAsArray()#转为numpy格式 data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return [col, row, geotrans, proj, data] def read_tif02(file): data = gdalnumeric.LoadFile(file) data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan data[data <= -3] = np.nan return data def save_tif(data, file, output): ds = gdal.Open(file) shape = data.shape driver = gdal.GetDriverByName("GTiff") dataset = driver.Create(output, shape[1], shape[0], 1, gdal.GDT_Int16)#保存的数据类型 dataset.SetGeoTransform(ds.GetGeoTransform()) dataset.SetProjection(ds.GetProjection()) dataset.GetRasterBand(1).WriteArray(data) if __name__ == '__main__': file_path = r"D:\AAWORK\work\分段数据" file_out = r"D:\AAWORK\work\2021NDVISUM.tif" file_list = get_all_file_name(file_path) data_all = read_tif02(r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\kongbai_zhu_250m.tif") for file in file_list: data = read_tif02(file) data_all = data_all+data save_tif(data_all,r"D:\AAWORK\work\研究方向\研究方向01作物分类内容\风云数据\MERSI-II植被指数旬产品(250M)\kongbai_zhu_250m.tif",file_out)
5 生成一个空白TIF(每个像元值为0的TIF)
思路比较简单,就是遍历每个像元,然后把每个像元的值设置为0,设置为其它可以,然后再进行保存。
from osgeo import gdal import numpy as np def read_tif(filepath): dataset = gdal.Open(filepath) col = dataset.RasterXSize#图像长度 row = dataset.RasterYSize#图像宽度 geotrans = dataset.GetGeoTransform()#读取仿射变换 proj = dataset.GetProjection()#读取投影 data = dataset.ReadAsArray()#转为numpy格式 data = data.astype(np.float32) # a = data[0][0] # data[data == a] = np.nan # data[data <= -3] = np.nan return [col, row, geotrans, proj, data] def save_tif(data, file, output): ds = gdal.Open(file) shape = data.shape driver = gdal.GetDriverByName("GTiff") dataset = driver.Create(output, shape[1], shape[0], 1, gdal.GDT_Int16)#保存的数据类型 dataset.SetGeoTransform(ds.GetGeoTransform()) dataset.SetProjection(ds.GetProjection()) dataset.GetRasterBand(1).WriteArray(data) if __name__ == '__main__': file_path = r"D:\AAWORK\work\2021NDVISUM.tif" file_out = r"D:\AAWORK\work\kongbai.tif" col, row, geotrans, proj, data = read_tif(file_path) for i in range(0,row): for j in range(0,col): data[i][j] = 0 save_tif(data,file_path,file_out)
以上就是Python实现分段读取和保存遥感数据的详细内容,更多关于Python遥感数据的资料请关注脚本之家其它相关文章!