c#读取excel数据的两种方法实现
作者:脸谱匠
方法一:OleDb: 用这种方法读取Excel速度还是非常的快的,但这种方式读取数据的时候不太灵活,不过可以在 DataTable 中对数据进行一些删减修改。
优点:读取方式简单、读取速度快
缺点:除了读取过程不太灵活之外,这种读取方式还有个弊端就是,当Excel数据量很大时。会非常占用内存,当内存不够时会抛出内存溢出的异常。
不过一般情况下还是非常不错的。
(代码比原文相较有所修改)
DataTable GetDataFromExcelByConn(bool hasTitle = false) { OpenFileDialog openFile = new OpenFileDialog(); openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls"; openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop); openFile.Multiselect = false; if (openFile.ShowDialog() == DialogResult.Cancel) return null; var filePath = openFile.FileName; string fileType = System.IO.Path.GetExtension(filePath); if (string.IsNullOrEmpty(fileType)) return null; using (DataSet ds = new DataSet()) { string strCon = string.Format("Provider=Microsoft.Jet.OLEDB.{0}.0;" + "Extended Properties=\"Excel {1}.0;HDR={2};IMEX=1;\";" + "data source={3};", (fileType == ".xls" ? 4 : 12), (fileType == ".xls" ? 8 : 12), (hasTitle ? "Yes" : "NO"), filePath); string strCom = " SELECT * FROM [Sheet1$]"; using (OleDbConnection myConn = new OleDbConnection(strCon)) using (OleDbDataAdapter myCommand = new OleDbDataAdapter(strCom, myConn)) { myConn.Open(); myCommand.Fill(ds); } if (ds == null || ds.Tables.Count <= 0) return null; return ds.Tables[0]; } }
方法二:Com组件的方式读取Excel
这种方式需要先引用 Microsoft.Office.Interop.Excel 。首选说下这种方式的优缺点
优点:可以非常灵活的读取Excel中的数据
缺点:如果是Web站点部署在IIS上时,还需要服务器机子已安装了Excel,有时候还需要为配置IIS权限。最重要的一点因为是基于单元格方式读取的,所以数据很慢(曾做过试验,直接读取千行、200多列的文件,直接读取耗时15分钟。即使采用多线程分段读取来提高CPU的利用率也需要8分钟。PS:CPU I3)
需要读取大文件的的童鞋们慎重。。。
(代码比原文相较有所修改)
DataTable GetDataFromExcelByCom(bool hasTitle = false) { OpenFileDialog openFile = new OpenFileDialog(); openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls"; openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop); openFile.Multiselect = false; if (openFile.ShowDialog() == DialogResult.Cancel) return null; var excelFilePath = openFile.FileName; Excel.Application app = new Excel.Application(); Excel.Sheets sheets; object oMissiong = System.Reflection.Missing.Value; Excel.Workbook workbook = null; DataTable dt = new DataTable(); try { if (app == null) return null; workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong); sheets = workbook.Worksheets; //将数据读入到DataTable中 Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表 if (worksheet == null) return null; int iRowCount = worksheet.UsedRange.Rows.Count; int iColCount = worksheet.UsedRange.Columns.Count; //生成列头 for (int i = 0; i < iColCount; i++) { var name = "column" + i; if (hasTitle) { var txt = ((Excel.Range)worksheet.Cells[1, i + 1]).Text.ToString(); if (!string.IsNullOrWhiteSpace(txt)) name = txt; } while (dt.Columns.Contains(name)) name = name + "_1";//重复行名称会报错。 dt.Columns.Add(new DataColumn(name, typeof(string))); } //生成行数据 Excel.Range range; int rowIdx = hasTitle ? 2 : 1; for (int iRow = rowIdx; iRow <= iRowCount; iRow++) { DataRow dr = dt.NewRow(); for (int iCol = 1; iCol <= iColCount; iCol++) { range = (Excel.Range)worksheet.Cells[iRow, iCol]; dr[iCol - 1] = (range.Value2 == null) ? "" : range.Text.ToString(); } dt.Rows.Add(dr); } return dt; } catch { return null; } finally { workbook.Close(false, oMissiong, oMissiong); System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook); workbook = null; app.Workbooks.Close(); app.Quit(); System.Runtime.InteropServices.Marshal.ReleaseComObject(app); app = null; } }
原文的方法二还提供了多线程处理数据的代码,一并复制到此(此处出现了一个SheetOptions的类型,无法考证其来源,如果知晓,请留言,谢谢。):
/// <summary> /// 使用COM,多线程读取Excel(1 主线程、4 副线程) /// </summary> /// <param name="excelFilePath">路径</param> /// <returns>DataTabel</returns> public System.Data.DataTable ThreadReadExcel(string excelFilePath) { Excel.Application app = new Excel.Application(); Excel.Sheets sheets = null; Excel.Workbook workbook = null; object oMissiong = System.Reflection.Missing.Value; System.Data.DataTable dt = new System.Data.DataTable(); try { if (app == null) { return null; } workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong); //将数据读入到DataTable中——Start sheets = workbook.Worksheets; Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表 if (worksheet == null) return null; string cellContent; int iRowCount = worksheet.UsedRange.Rows.Count; int iColCount = worksheet.UsedRange.Columns.Count; Excel.Range range; //负责列头Start DataColumn dc; int ColumnID = 1; range = (Excel.Range)worksheet.Cells[1, 1]; //while (range.Text.ToString().Trim() != "") while (iColCount >= ColumnID) { dc = new DataColumn(); dc.DataType = System.Type.GetType("System.String"); string strNewColumnName = range.Text.ToString().Trim(); if (strNewColumnName.Length == 0) strNewColumnName = "_1"; //判断列名是否重复 for (int i = 1; i < ColumnID; i++) { if (dt.Columns[i - 1].ColumnName == strNewColumnName) strNewColumnName = strNewColumnName + "_1"; } dc.ColumnName = strNewColumnName; dt.Columns.Add(dc); range = (Excel.Range)worksheet.Cells[1, ++ColumnID]; } //End //数据大于500条,使用多进程进行读取数据 if (iRowCount - 1 > 500) { //开始多线程读取数据 //新建线程 int b2 = (iRowCount - 1) / 10; DataTable dt1 = new DataTable("dt1"); dt1 = dt.Clone(); SheetOptions sheet1thread = new SheetOptions(worksheet, iColCount, 2, b2 + 1, dt1); Thread othread1 = new Thread(new ThreadStart(sheet1thread.SheetToDataTable)); othread1.Start(); //阻塞 1 毫秒,保证第一个读取 dt1 Thread.Sleep(1); DataTable dt2 = new DataTable("dt2"); dt2 = dt.Clone(); SheetOptions sheet2thread = new SheetOptions(worksheet, iColCount, b2 + 2, b2 * 2 + 1, dt2); Thread othread2 = new Thread(new ThreadStart(sheet2thread.SheetToDataTable)); othread2.Start(); DataTable dt3 = new DataTable("dt3"); dt3 = dt.Clone(); SheetOptions sheet3thread = new SheetOptions(worksheet, iColCount, b2 * 2 + 2, b2 * 3 + 1, dt3); Thread othread3 = new Thread(new ThreadStart(sheet3thread.SheetToDataTable)); othread3.Start(); DataTable dt4 = new DataTable("dt4"); dt4 = dt.Clone(); SheetOptions sheet4thread = new SheetOptions(worksheet, iColCount, b2 * 3 + 2, b2 * 4 + 1, dt4); Thread othread4 = new Thread(new ThreadStart(sheet4thread.SheetToDataTable)); othread4.Start(); //主线程读取剩余数据 for (int iRow = b2 * 4 + 2; iRow <= iRowCount; iRow++) { DataRow dr = dt.NewRow(); for (int iCol = 1; iCol <= iColCount; iCol++) { range = (Excel.Range)worksheet.Cells[iRow, iCol]; cellContent = (range.Value2 == null) ? "" : range.Text.ToString(); dr[iCol - 1] = cellContent; } dt.Rows.Add(dr); } othread1.Join(); othread2.Join(); othread3.Join(); othread4.Join(); //将多个线程读取出来的数据追加至 dt1 后面 foreach (DataRow dr in dt.Rows) dt1.Rows.Add(dr.ItemArray); dt.Clear(); dt.Dispose(); foreach (DataRow dr in dt2.Rows) dt1.Rows.Add(dr.ItemArray); dt2.Clear(); dt2.Dispose(); foreach (DataRow dr in dt3.Rows) dt1.Rows.Add(dr.ItemArray); dt3.Clear(); dt3.Dispose(); foreach (DataRow dr in dt4.Rows) dt1.Rows.Add(dr.ItemArray); dt4.Clear(); dt4.Dispose(); return dt1; } else { for (int iRow = 2; iRow <= iRowCount; iRow++) { DataRow dr = dt.NewRow(); for (int iCol = 1; iCol <= iColCount; iCol++) { range = (Excel.Range)worksheet.Cells[iRow, iCol]; cellContent = (range.Value2 == null) ? "" : range.Text.ToString(); dr[iCol - 1] = cellContent; } dt.Rows.Add(dr); } } //将数据读入到DataTable中——End return dt; } catch { return null; } finally { workbook.Close(false, oMissiong, oMissiong); System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook); System.Runtime.InteropServices.Marshal.ReleaseComObject(sheets); workbook = null; app.Workbooks.Close(); app.Quit(); System.Runtime.InteropServices.Marshal.ReleaseComObject(app); app = null; GC.Collect(); GC.WaitForPendingFinalizers(); } }
补充SheetOptions代码:
class SheetOptions { Microsoft.Office.Interop.Excel.Worksheet worksheet; int iColCount; int star; int end; System.Data.DataTable dt; public SheetOptions(Microsoft.Office.Interop.Excel.Worksheet worksheet, int iColCount, int star, int end, System.Data.DataTable dt) { this.worksheet = worksheet; this.iColCount = iColCount; this.star = star; this.end = end; this.dt = dt; } public void SheetToDataTable() { string cellContent; Microsoft.Office.Interop.Excel.Range range; for (int iRow = star; iRow <= end; iRow++) { System.Data.DataRow dr = dt.NewRow(); for (int iCol = 1; iCol <= iColCount; iCol++) { range = (Microsoft.Office.Interop.Excel.Range)worksheet.Cells[iRow, iCol]; cellContent = (range.Value2 == null) ? "" : range.Text.ToString(); dr[iCol - 1] = cellContent; } dt.Rows.Add(dr); } } }
原文还提供了第三种方法,感兴趣的可以关心一下:
方法三:NPOI方式读取Excel,NPOI是一组开源的组件,类似Java的 POI。包括:NPOI、NPOI.HPSF、NPOI.HSSF、NPOI.HSSF.UserModel、NPOI.POIFS、NPOI.Util,下载的时候别只下一个噢
优点:读取Excel速度较快,读取方式操作灵活性
缺点:只支持03的Excel,xlsx的无法读取。由于这点,使用这种方式的人不多啊,没理由要求客户使用03版Excel吧,再说03版Excel对于行数还有限制,只支持65536行。
(听他们的开发人员说会在2012年底推出新版,支持xlsx的读取。但一直很忙没时间去关注这个事情,有兴趣的同学可以瞧瞧去)
using System; using System.Data; using System.IO; using System.Web; using NPOI; using NPOI.HPSF; using NPOI.HSSF; using NPOI.HSSF.UserModel; using NPOI.POIFS; using NPOI.Util; using System.Text; using System.Configuration; public class NPOIHelper { private static int ExcelMaxRow = Convert.ToInt32(ConfigurationManager.AppSettings["ExcelMaxRow"]); /// <summary> /// 由DataSet导出Excel /// </summary> /// <param name="sourceTable">要导出数据的DataTable</param> /// <param name="sheetName">工作表名称</param> /// <returns>Excel工作表</returns> private static Stream ExportDataSetToExcel(DataSet sourceDs) { HSSFWorkbook workbook = new HSSFWorkbook(); MemoryStream ms = new MemoryStream(); for (int i = 0; i < sourceDs.Tables.Count; i++) { HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceDs.Tables[i].TableName); HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0); // handling header. foreach (DataColumn column in sourceDs.Tables[i].Columns) headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName); // handling value. int rowIndex = 1; foreach (DataRow row in sourceDs.Tables[i].Rows) { HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex); foreach (DataColumn column in sourceDs.Tables[i].Columns) { dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString()); } rowIndex++; } } workbook.Write(ms); ms.Flush(); ms.Position = 0; workbook = null; return ms; } /// <summary> /// 由DataSet导出Excel /// </summary> /// <param name="sourceTable">要导出数据的DataTable</param> /// <param name="fileName">指定Excel工作表名称</param> /// <returns>Excel工作表</returns> public static void ExportDataSetToExcel(DataSet sourceDs, string fileName) { //检查是否有Table数量超过65325 for (int t = 0; t < sourceDs.Tables.Count; t++) { if (sourceDs.Tables[t].Rows.Count > ExcelMaxRow) { DataSet ds = GetdtGroup(sourceDs.Tables[t].Copy()); sourceDs.Tables.RemoveAt(t); //将得到的ds插入 sourceDs中 for (int g = 0; g < ds.Tables.Count; g++) { DataTable dt = ds.Tables[g].Copy(); sourceDs.Tables.Add(dt); } t--; } } MemoryStream ms = ExportDataSetToExcel(sourceDs) as MemoryStream; HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName); HttpContext.Current.Response.BinaryWrite(ms.ToArray()); HttpContext.Current.ApplicationInstance.CompleteRequest(); //HttpContext.Current.Response.End(); ms.Close(); ms = null; } /// <summary> /// 由DataTable导出Excel /// </summary> /// <param name="sourceTable">要导出数据的DataTable</param> /// <returns>Excel工作表</returns> private static Stream ExportDataTableToExcel(DataTable sourceTable) { HSSFWorkbook workbook = new HSSFWorkbook(); MemoryStream ms = new MemoryStream(); HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceTable.TableName); HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0); // handling header. foreach (DataColumn column in sourceTable.Columns) headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName); // handling value. int rowIndex = 1; foreach (DataRow row in sourceTable.Rows) { HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex); foreach (DataColumn column in sourceTable.Columns) { dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString()); } rowIndex++; } workbook.Write(ms); ms.Flush(); ms.Position = 0; sheet = null; headerRow = null; workbook = null; return ms; } /// <summary> /// 由DataTable导出Excel /// </summary> /// <param name="sourceTable">要导出数据的DataTable</param> /// <param name="fileName">指定Excel工作表名称</param> /// <returns>Excel工作表</returns> public static void ExportDataTableToExcel(DataTable sourceTable, string fileName) { //如数据超过65325则分成多个Table导出 if (sourceTable.Rows.Count > ExcelMaxRow) { DataSet ds = GetdtGroup(sourceTable); //导出DataSet ExportDataSetToExcel(ds, fileName); } else { MemoryStream ms = ExportDataTableToExcel(sourceTable) as MemoryStream; HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName); HttpContext.Current.Response.BinaryWrite(ms.ToArray()); HttpContext.Current.ApplicationInstance.CompleteRequest(); //HttpContext.Current.Response.End(); ms.Close(); ms = null; } } /// <summary> /// 传入行数超过65325的Table,返回DataSet /// </summary> /// <param name="dt"></param> /// <returns></returns> public static DataSet GetdtGroup(DataTable dt) { string tablename = dt.TableName; DataSet ds = new DataSet(); ds.Tables.Add(dt); double n = dt.Rows.Count / Convert.ToDouble(ExcelMaxRow); //创建表 for (int i = 1; i < n; i++) { DataTable dtAdd = dt.Clone(); dtAdd.TableName = tablename + "_" + i.ToString(); ds.Tables.Add(dtAdd); } //分解数据 for (int i = 1; i < ds.Tables.Count; i++) { //新表行数达到最大 或 基表数量不足 while (ds.Tables[i].Rows.Count != ExcelMaxRow && ds.Tables[0].Rows.Count != ExcelMaxRow) { ds.Tables[i].Rows.Add(ds.Tables[0].Rows[ExcelMaxRow].ItemArray); ds.Tables[0].Rows.RemoveAt(ExcelMaxRow); } } return ds; } /// <summary> /// 由DataTable导出Excel /// </summary> /// <param name="sourceTable">要导出数据的DataTable</param> /// <param name="fileName">指定Excel工作表名称</param> /// <returns>Excel工作表</returns> public static void ExportDataTableToExcelModel(DataTable sourceTable, string modelpath, string modelName, string fileName, string sheetName) { int rowIndex = 2;//从第二行开始,因为前两行是模板里面的内容 int colIndex = 0; FileStream file = new FileStream(modelpath + modelName + ".xls", FileMode.Open, FileAccess.Read);//读入excel模板 HSSFWorkbook hssfworkbook = new HSSFWorkbook(file); HSSFSheet sheet1 = (HSSFSheet)hssfworkbook.GetSheet("Sheet1"); sheet1.GetRow(0).GetCell(0).SetCellValue("excelTitle"); //设置表头 foreach (DataRow row in sourceTable.Rows) { //双循环写入sourceTable中的数据 rowIndex++; colIndex = 0; HSSFRow xlsrow = (HSSFRow)sheet1.CreateRow(rowIndex); foreach (DataColumn col in sourceTable.Columns) { xlsrow.CreateCell(colIndex).SetCellValue(row[col.ColumnName].ToString()); colIndex++; } } sheet1.ForceFormulaRecalculation = true; FileStream fileS = new FileStream(modelpath + fileName + ".xls", FileMode.Create);//保存 hssfworkbook.Write(fileS); fileS.Close(); file.Close(); } }
到此这篇关于c#读取excel数据的两种方法实现的文章就介绍到这了,更多相关c#读取excel内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!