서울-경기 지역의 카페 데이터를 효율적으로 처리하기 위해 공공데이터를 활용한 경험을 공유합니다. 데이터 정제와 효율적인 삽입 방식 개선을 통해 성능을 크게 향상시켰습니다.
public class CsvFileReader {
private final CafeRepository cafeRepository;
public String PATH = PATH_ROOT + "/cafe_list_seoul_202409.csv";
@Transactional
@EventListener(ApplicationReadyEvent.class)
public void readCsv() {
CSVReader reader = null;
try {
reader = new CSVReader(new FileReader(PATH));
String[] headers = reader.readNext(); // 헤더를 읽음
List<String[]> rows = reader.readAll();
List<Cafe> cafeList = new ArrayList<>();
for (int i = 0; i < rows.size(); i++) {
if ("카페".equals(rows.get(i)[8])) {
Cafe cafe = convertCsvToCafe(rows.get(i));
cafeList.add(cafe);
}
}
cafeRepository.saveAll(cafeList);
} catch (FileNotFoundException | CsvValidationException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (CsvException e) {
throw new RuntimeException(e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
public Cafe convertCsvToCafe(String[] row) {
String name = ""; // [1] + [2]
String roadNamedAddress = ""; //[31] + [30] + [35] + [36]
String zipCode = ""; // [33]
double longitude = 0; // [37]
double latitude = 0; // [38]
for (int i = 1; i < row.length; i++) {
name = row[1] + " " + row[2];
roadNamedAddress = row[31] + " " + row[30];
zipCode = row[33];
longitude = Double.valueOf(row[37]);
latitude = Double.valueOf(row[38]);
}
return Cafe.builder()
.name(name)
.roadNamedAddress(roadNamedAddress)
.zipCode(zipCode)
.longitude(longitude)
.latitude(latitude)
.build();
}
}
@Transactional
public void rewriteCSV() {
try {
CSVReader reader = new CSVReader(new FileReader(PATH));
CSVWriter writer = new CSVWriter(new FileWriter(PATH_ROOT + "/cafe_list_seoul.csv"));
String[] headers = {"이름", "도로명주소", "우편번호", "경도", "위도"};
writer.writeNext(headers);
List<String[]> cafeInfos = rewriteCafeInfos(reader);
writer.writeAll(cafeInfos);
writer.close();
reader.close();
} catch (IOException | CsvException e) {
log.error(e.getMessage());
throw new GlobalException(ErrorCode.CSV_REFACTOR_ERROR);
}
}
/*
* 원본 CSV 파일에서 필요 데이터만 추출 및 정제하는 메서드
* 필요 데이터(이름, 도로명 주소, 우편 번호, 경도, 위도
*/
private List<String[]> rewriteCafeInfos(CSVReader reader) throws IOException, CsvException {
return reader.readAll().stream()
.filter(row -> "카페".equals(row[8]))
.map(row -> new String[]{
row[1] + " " + row[2],
row[30] + " " + row[31],
row[33],
row[37],
row[38]
})
.toList();
}
cafeRepository.saveAll(cafeList);
@Repository
@RequiredArgsConstructor
public class CafeJdbcRepository {
private final JdbcTemplate jdbcTemplate;
@Transactional
public void bulkInsert(List<Cafe> cafeList) {
String sql = "insert into Cafe(name, road_named_address, zip_code,longitude,latitude) values(?,?,?,?,?)";
jdbcTemplate.batchUpdate(sql, new BatchPreparedStatementSetter() {
@Override
public void setValues(PreparedStatement ps, int i) throws SQLException {
Cafe cafe = cafeList.get(i);
ps.setString(1, cafe.getName());
ps.setString(2, cafe.getRoadNamedAddress());
ps.setString(3, cafe.getZipCode());
ps.setDouble(4, cafe.getLongitude());
ps.setDouble(5, cafe.getLatitude());
}
@Override
public int getBatchSize() {
return cafeList.size();
}
});
}
}
------------------------
cafeJdbcRepository.bulkInsert(cafeList);


