前言
最近要做一个excel表格上传并更新到数据库的功能,需要一次性上传100多万条数据。看了一些技术帖,最终实现的效果是上传100w数据的excel,耗时32秒。
以下是用到的技术工具:
EasyExcel:阿里巴巴开源的Excel处理工具,专门用于处理大文件,避免内存溢出。
批量插入:通过JdbcTemplate,每1000条数据插入一次,减少数据库交互。
多线程处理:使用SpringBoot线程池处,可以提高处理效率。
关于如何使用SpringBoot线程池可以参考这篇文章: SpringBoot线程池处理异步任务
一、背景
表格内容如下:
需要将表格中的板块信息保存到数据库,并生成区域字典。板块和区域的模型如下:
也就是将城市、行政区和街道信息更新到Region表中,编号、值和边界更新到Module表中。
二、依赖
<!--excel--> <dependency> <groupId>com.alibaba</groupId> <artifactId>easyexcel</artifactId> <version>3.0.2</version> </dependency>
三、生成一个100w数据的Excel文件
我们先通过EasyExcel生成一个100w数据的excel,用来做测试数据。
1、定义一个数据模型,RegionDetailInfo.class
@Getter @Setter public class RegionDetailInfo { @ExcelProperty("城市") private String city; @ExcelProperty("行政区") private String district; @ExcelProperty("街道") private String street; @ExcelProperty("编号") private String name; @ExcelProperty("值") private BigDecimal value; @ExcelProperty("边界") private String locations; public RegionDetailInfo(){} public RegionDetailInfo( String city,String district, String street, String name, BigDecimal value, String locations) { this.city = city; this.district = district; this.street = street; this.name = name; this.value= value; this.locations = locations; } }
2、实现导出功能
@Operation(summary = "下载测试excel") @GetMapping("/download") public void downloadExcel(HttpServletResponse response) throws IOException { response.setContentType("application/vnd.ms-excel"); response.setCharacterEncoding("utf-8"); String fileName = URLEncoder.encode("区域数据","UTF-8").replaceAll("\\+","%20"); response.setHeader("Content-disposition","attachment;filename*=utf-8''"+fileName+".xlsx"); List<RegionDetailInfo> regionList = generateRegionList(); EasyExcel.write(response.getOutputStream(),RegionDetailInfo.class) .sheet("区域信息") .doWrite(regionList); } public List<RegionDetailInfo> generateRegionList(){ String[] districts = new String[]{"徐汇区","闵行区","黄埔区","静安区","奉贤区","普陀区","宝山区","青浦区","崇明区","浦东新区"}; var values = new double[]{10.5,2.5,87.4,56.4,12.2,8.9,4.7,1.2,0.5,0.1}; var valueLength = values.length; Random random = new Random(); List<RegionDetailInfo> list = new ArrayList<>(); for (String district : districts){ for (int i = 1; i <= 100; i++){ for(int j = 1; j <= 1000; j++){ var index = random.nextInt(valueLength); var value = BigDecimal.valueOf(values[index]); var region = new RegionDetailInfo(district,district + i + "号街道","dd" + j,value,"[" + index + "," + i + "," + j + "]"); list.add(region); } } } return list; }
然后启动Spring Boot应用,访问/download端点,即可下载包含100w数据的Excel文件。
四、处理Excel表格逻辑
1、创建一个监听类,用于处理读取的数据
@Slf4j public class RegionFileListener extends AnalysisEventListener<RegionDetailInfo> { private final SyncRegionFileService syncRegionFileService; //板块 private List<ModuleInfo> moduleList = new ArrayList<>(); //城市 private Map<String, RegionInfo> cities = new HashMap<>(); //行政区 private Map<String,RegionInfo> districts = new HashMap<>(); //街道 private Map<String,RegionInfo> streets = new HashMap<>(); public RegionFileListener(SyncRegionFileService syncRegionFileService) { this.syncRegionFileService = syncRegionFileService; } @Override public void invoke(RegionDetailInfo regionDetailInfo, AnalysisContext analysisContext) { //将数据转成城市、行政区、街道和板块 convertRegion(regionDetailInfo); //每1000条数据插入一次 if(moduleList.size() >= 1000){ syncRegionFileService.saveModule(new ArrayList<>(this.moduleList)); //插入成功就清空list this.moduleList.clear(); } } @Override public void doAfterAllAnalysed(AnalysisContext analysisContext) { //excel读取完成后,保存剩余的module syncRegionFileService.saveModule(new ArrayList<>(this.moduleList)); //保存城市、行政区和街道 syncRegionFileService.saveRegion(new ArrayList<>(this.cities.values()),RegionType.City.getValue()); syncRegionFileService.saveRegion(new ArrayList<>(this.districts.values()),RegionType.District.getValue()); syncRegionFileService.saveRegion(new ArrayList<>(this.streets.values()),RegionType.Street.getValue()); } private void convertRegion(RegionDetailInfo regionDetailInfo) { var cityId = appendRegion(regionDetailInfo.getCity(),null, RegionType.City); var districtId = appendRegion(regionDetailInfo.getDistrict(),regionDetailInfo.getCity(), RegionType.District); var streetId = appendRegion(regionDetailInfo.getStreet(),regionDetailInfo.getDistrict(), RegionType.Street); moduleList.add(new ModuleInfo(nextIdStr(), cityId, districtId, streetId, regionDetailInfo.getName(), regionDetailInfo.getValue(), regionDetailInfo.getLocations())); } //添加到map,并生成ID private String appendRegion(String regionName,String parentRegionName, RegionType type){ switch (type){ case City: if(!cities.containsKey(regionName)){ this.cities.put(regionName,new RegionInfo(nextIdStr(),regionName,null)); } return cities.get(regionName).id; case District: if(!districts.containsKey(regionName)){ var parentId = cities.get(parentRegionName).id; this.districts.put(regionName,new RegionInfo(nextIdStr(),regionName,parentId)); } return districts.get(regionName).id; case Street: if(StringUtils.isNullOrEmpty(regionName)){ return null; } if(!streets.containsKey(regionName)){ var parentId = districts.get(parentRegionName).id; this.streets.put(regionName,new RegionInfo(nextIdStr(),regionName,parentId)); } return streets.get(regionName).id; default: return null; } } privat String nextIdStr(){ return UUID.randomUUID().toString(); } }
RegionType枚举
@AllArgsConstructor @Getter public enum RegionType { City("市"), District("区"), Street("街道"); private final String text; public String getValue() { return name(); } }
异步处理service,SyncRegionFileService.class
@Service @Slf4j @AllArgsConstructor public class SyncRegionFileService { private JdbcTemplate jdbcTemplate; private final String regionSql = "Insert into region_test Values"; private final String moduleSql = "insert into region_test_module Values"; //如果没有配置线程池,可以不加此注解 @Async("asyncServiceExecutor") public void saveModule(List<ModuleInfo> moduleList){ if(moduleList.isEmpty()){ return; } StringBuffer sql = new StringBuffer(moduleSql); for (ModuleInfo moduleInfo : moduleList){ var value = "('"+moduleInfo.id+"',NOW(),'admin',0,NOW(),'admin','"+moduleInfo.cityId+"','"+moduleInfo.directId+"',"+getColumnName(moduleInfo.streetId)+",'"+moduleInfo.name+"',"+moduleInfo.value+",'"+moduleInfo.location+"'),"; sql.append(value); } sql.deleteCharAt(sql.length() - 1); sql.append(";"); jdbcTemplate.execute(sql.toString()); log.info("板块批量保存成功! " ); } @Async("asyncServiceExecutor") public void saveRegion(List<RegionInfo> regionList,String type) { if(regionList.isEmpty()){ return; } StringBuffer sql = new StringBuffer(regionSql); for (RegionInfo regionInfo : regionList){ var value = "('"+regionInfo.id+"',NOW(),'admin',0,NOW(),'admin','"+regionInfo.name+"',"+getColumnName(regionInfo.parentId)+",'"+type+"'),"; sql.append(value); } sql.deleteCharAt(sql.length() - 1); jdbcTemplate.execute(sql.toString()); log.info("区域批量保存成功! " ); } private String getColumnName(String name){ return name == null ? "NULL" : "'"+name+"'"; } }
五、控制器读取Excel
private final SyncRegionFileService syncRegionFileService; @Operation(summary = "测试区域excel") @PostMapping(value = "/uploadTest",consumes = MediaType.MULTIPART_FORM_DATA_VALUE) public void uploadExcelTest(@RequestPart("file") MultipartFile file) throws IOException { EasyExcel.read(file.getInputStream(), RegionDetailInfo.class, new RegionFileListener(syncRegionFileService)) .sheet() .doRead(); }
六、测试
启动Spring Boot应用,访问上传端点。
最终接口用时30.93秒。
查看日志,大约32秒完成。
去数据库查询,100w条数据插入成功。