GitHub hospital-api
100,000건이 넘는 병원정보 csv 파일을 가져와 DB에 저장하는 프로젝트이다.
과정
1. csv 파일 불러와서 parsing
2. parsing한 데이터 DB insert
ReadLineContext.java
파일에서 한줄씩 읽어와서 parse 메서드를 통해 파싱을 한 결과를 result 리스트에 추가한다.
package com.springboot.hospitalsearchapi.parser;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ReadLineContext<T> {
Parser<T> parser;
public ReadLineContext(Parser<T> parser) {
this.parser = parser;
}
public List<T> readByLine(String filename) throws IOException {
// 삽
List<T> result = new ArrayList<>();
BufferedReader reader = new BufferedReader(
new FileReader(filename)
);
String str;
while ((str = reader.readLine()) != null) {
try {
result.add(parser.parse(str));
}catch(Exception e){
System.out.printf("파싱 중 문제가 생겨 이 라인은 넘어갑니다. 파일내용 : %s\n", str.substring(0, 20));
}
}
reader.close();
return result;
}
}
Parser.java
Parser 인터페이스이다.
package com.springboot.hospitalsearchapi.parser;
public interface Parser<T> {
T parse(String str);
}
ParserFactory.java
package com.springboot.hospitalsearchapi.parser;
import com.springboot.hospitalsearchapi.domain.Hospital;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ParserFactory {
@Bean
public ReadLineContext<Hospital> hospitalReadLineContextest(){
return new ReadLineContext<Hospital>(new HospitalParser());
}
}
HospitalParser.java
병원정보 한줄을 파싱하고 Hospital 객체를 생성해서 반환한다.
package com.springboot.hospitalsearchapi.parser;
import com.springboot.hospitalsearchapi.domain.Hospital;
import java.time.LocalDate;
import java.time.LocalDateTime;
public class HospitalParser implements Parser<Hospital>{
@Override
public Hospital parse(String str) {
String[] splitted = str.split("\",\"");
Hospital hospital = new Hospital();
hospital.setId(Integer.parseInt(splitted[0].replace("\"","")));
hospital.setOpenServiceName(splitted[1]);
hospital.setOpenLocalGovernmentCode(Integer.parseInt(splitted[3]));
hospital.setManagementNumber(splitted[4]);
int year=Integer.parseInt(splitted[5].substring(0, 4));
int month=Integer.parseInt(splitted[5].substring(4, 6));
int day=Integer.parseInt(splitted[5].substring(6, 8));
hospital.setLicenseDate(LocalDateTime.of(year, month, day, 0, 0, 0));
hospital.setBusinessStatus(Integer.parseInt(splitted[7]));
hospital.setBusinessStatusCode(Integer.parseInt(splitted[9]));
hospital.setPhone(splitted[15]);
hospital.setFullAddress(splitted[18]);
hospital.setRoadNameAddress(splitted[19]);
hospital.setHospitalName(splitted[21]);
hospital.setBusinessTypeName(splitted[25]);
hospital.setHealthcareProviderCount(Integer.parseInt(splitted[29]));
hospital.setPatientRoomCount(Integer.parseInt(splitted[30]));
hospital.setTotalNumberOfBeds(Integer.parseInt(splitted[31]));
hospital.setTotalAreaSize(Float.parseFloat(splitted[32]));
return hospital;
}
}
데이터가 십만건이상이라 많은 시간이 걸린다.
HospitalDao.java
JdbcTemplate를 이용해 DB에 Insert하고 Delete해주는 메서드를 구현했다.
package com.springboot.hospitalsearchapi.dao;
import com.springboot.hospitalsearchapi.domain.Hospital;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.stereotype.Component;
import javax.sql.DataSource;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
@Component
@Slf4j
public class HospitalDao {
private final DataSource dataSource;
private final JdbcTemplate jdbcTemplate;
public HospitalDao(DataSource dataSource, JdbcTemplate jdbcTemplate){
this.dataSource=dataSource;
this.jdbcTemplate=jdbcTemplate;
}
public void add(final Hospital hospital){
String sql = "Insert into nation_wide_hospitals values " +
"(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
this.jdbcTemplate.update(sql, hospital.getId(), hospital.getOpenServiceName(),
hospital.getOpenLocalGovernmentCode(), hospital.getManagementNumber(),
hospital.getLicenseDate(), hospital.getBusinessStatus(),
hospital.getBusinessStatusCode(), hospital.getPhone(),
hospital.getFullAddress(), hospital.getRoadNameAddress(),
hospital.getHospitalName(), hospital.getBusinessTypeName(),
hospital.getHealthcareProviderCount(), hospital.getPatientRoomCount(),
hospital.getTotalNumberOfBeds(), hospital.getTotalAreaSize());
}
public int deleteAll(){
return this.jdbcTemplate.update("delete from nation_wide_hospitals");
}
public int getCount(){
return this.jdbcTemplate.queryForObject(
"select count(*) from nation_wide_hospitals", Integer.class);
}
}
HospitalParserTest.java
데이터가 잘 들어가는지 Test하는 코드입니다.
package com.springboot.hospitalsearchapi.parser;
import com.springboot.hospitalsearchapi.dao.HospitalDao;
import com.springboot.hospitalsearchapi.domain.Hospital;
import com.springboot.hospitalsearchapi.service.HospitalService;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
@SpringBootTest
class HospitalParserTest {
@Autowired
ReadLineContext<Hospital> hospitalReadLineContext;
@Autowired
HospitalDao hospitalDao;
@Autowired
HospitalService hospitalService;
@Test
@DisplayName("병원정보 전체 add 잘되는지")
void addAllTest() throws IOException {
String filename="/Users/minji/Documents/likelion/file/fulldata_01_01_02_P_의원.csv";
hospitalDao.deleteAll();
int cnt=hospitalService.insertLargeVolumHospitalData(filename);
assertEquals(cnt, hospitalDao.getCount());
}
@Test
@DisplayName("Hospital deleteAll, getCount 잘 되는지")
void deleteAllTest(){
hospitalDao.deleteAll();
assertEquals(0, hospitalDao.getCount());
}
}
데이터가 다 들어가는데 한시간 걸리던 작업이 병렬처리를 하고 나서는 10분 이내로 끝난다.
HospitalService.java 추가
package com.springboot.hospitalsearchapi.service;
import com.springboot.hospitalsearchapi.dao.HospitalDao;
import com.springboot.hospitalsearchapi.domain.Hospital;
import com.springboot.hospitalsearchapi.parser.ReadLineContext;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
@Service
public class HospitalService {
private final ReadLineContext<Hospital> hospitalReadLineContext;
private final HospitalDao hospitalDao;
public HospitalService(ReadLineContext<Hospital> hospitalReadLineContext, HospitalDao hospitalDao) {
this.hospitalReadLineContext = hospitalReadLineContext;
this.hospitalDao = hospitalDao;
}
@Transactional
public int insertLargeVolumeHospitalData(String filename) {
List<Hospital> hospitalList;
try {
hospitalList = hospitalReadLineContext.readByLine(filename);
System.out.println("파싱이 끝났습니다.");
hospitalList.stream()
.parallel()
.forEach(hospital -> {
try {
this.hospitalDao.add(hospital); // db에 insert하는 구간
} catch (Exception e) {
System.out.printf("id:%d 레코드에 문제가 있습니다.\n",hospital.getId());
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
if (!Optional.of(hospitalList).isEmpty()) {
return hospitalList.size();
} else {
return 0;
}
}
}
HospitalParserTest.java 수정
package com.springboot.hospitalsearchapi.parser;
import com.springboot.hospitalsearchapi.dao.HospitalDao;
import com.springboot.hospitalsearchapi.domain.Hospital;
import com.springboot.hospitalsearchapi.service.HospitalService;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
@SpringBootTest
class HospitalParserTest {
@Autowired
ReadLineContext<Hospital> hospitalReadLineContext;
@Autowired
HospitalDao hospitalDao;
@Autowired
HospitalService hospitalService;
@Test
@DisplayName("병원정보 전체 add 잘되는지")
void addAllTest() throws IOException {
String filename="/Users/minji/Documents/likelion/file/fulldata_01_01_02_P_의원.csv";
hospitalDao.deleteAll();
int cnt=hospitalService.insertLargeVolumHospitalData(filename);
assertEquals(cnt, hospitalDao.getCount());
}
@Test
@DisplayName("Hospital deleteAll, getCount 잘 되는지")
void deleteAllTest(){
hospitalDao.deleteAll();
assertEquals(0, hospitalDao.getCount());
}
}