1,의원,01_01_02_P,3620000,PHMA119993620020041100004,19990612,,1,영업/정상,13,영업중,,,,,062-515-2875,,500881,광주광역시 북구 풍향동 565번지 4호 3층,"광주광역시 북구 동문대로 24, 3층 (풍향동)",61205,효치과의원,2.02111E+13,U,2021.11.17 2:40,치과의원,192630.7351,185314.6176,치과의원,1,0,0,52.29,401,치과,,,,0,0,,,0,
public Hospital parse(String str) {
String[] row = str.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)", -1);
try{
int id = Integer.parseInt(row[0]);
String service = row[1];
int localCode = Integer.parseInt(row[3]);
String manageNum = row[4];
LocalDateTime licenseDate = strConverterToDate(row[5]);
int businessStatus = Integer.parseInt(row[7]);
int businessCode = Integer.parseInt(row[9]);
String phone = row[15];
String address = row[18].replace("\"","");
String roadAddress = row[19].replace("\"","");
...
(?=)
(전방 탐색, looahead) : 작성한 패턴에 일치하는 영역이 존재하여도 그 값이 제외되어서 나오는 패턴.split()
: limit (두번째 파라미터)를 음수로 주면 모든 구분값을 나눠서 배열로 반환한다.출처 : https://hashcode.co.kr/questions/948/문자열을-콤마를-기준으로-분리하는데-안에-있는-콤마는-무시하게-할수없나요
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
public static void main(String[] args) {
String realData = "가,abc,100,\"1,000\",\"400,000,000\",RXT100";
String tempData = realData;
Pattern ptrn = Pattern.compile("\"(.*?)\""); // 따옴표 안에 있는 패턴 추출.
Matcher matcher = ptrn.matcher(tempData);
System.out.println("수정전 : " + tempData);
while(matcher.find()) {
System.out.println(matcher.group());
tempData = matcher.replaceFirst(matcher.group().replace(",", "").replace("\"", ""));
matcher = ptrn.matcher(tempData);
}
System.out.println("수정후 : " + tempData);
System.out.println("원본 : " + realData);
}
}
출처 : https://mohading.tistory.com/35
//gradle
implementation 'com.opencsv:opencsv:5.5'
//maven
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.5.2</version>
</dependency>
package com.practice.hospitalapi.parser;
import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvValidationException;
import com.practice.hospitalapi.domain.Hospital;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.time.DateTimeException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
public class CsvParser {
public List<Hospital> read(String filename) throws FileNotFoundException {
String[] row;
List<Hospital> hospitalList = new ArrayList<>();
CSVReader csvReader = new CSVReader(new InputStreamReader(new FileInputStream(filename)));
try {
csvReader.readNext(); //제목줄은 제거
do{
row = csvReader.readNext();
int id = 0;
String service = null;
int localCode = 0;
String manageNum = null;
LocalDateTime licenseDate = null;
int businessStatus = 0;
int businessCode = 0;
String phone = null;
String address = null;
String roadAddress = null;
String name = null;
String businessType = null;
int providerNum = 0;
int roomNum = 0;
int bedNum = 0;
float area = 0;
try {
id = Integer.parseInt(row[0]);
service = row[1];
localCode = Integer.parseInt(row[3]);
manageNum = row[4];
licenseDate = strConverterToDate(row[5]);
businessStatus = Integer.parseInt(row[7]);
businessCode = Integer.parseInt(row[9]);
phone = row[15];
address = row[18];
roadAddress = row[19];
name = row[21];
businessType = row[25];
providerNum = Integer.parseInt(row[29]);
roomNum = Integer.parseInt(row[30]);
bedNum = Integer.parseInt(row[31]);
area = Float.parseFloat(row[32].replace("\"", ""));
} catch (NumberFormatException | DateTimeException | NullPointerException e) {
System.out.println("빈 값이 있습니다.");
}
Hospital hospital = new Hospital(id, service, localCode, manageNum, licenseDate, businessStatus, businessCode,
phone, address, roadAddress, name, businessType, providerNum, roomNum, bedNum, area);
hospitalList.add(hospital);
}while(row != null);
} catch (IOException e) {
System.out.println("파싱을 실패했습니다.");
} catch (CsvValidationException e) {
System.out.println("파싱을 실패했습니다.");
}
return hospitalList;
}
public LocalDateTime strConverterToDate(String str){
int year = Integer.parseInt(str.substring(0,4));
int month = Integer.parseInt(str.substring(4,6));
int day = Integer.parseInt(str.substring(6,8));
return LocalDateTime.of(year, month, day, 0, 0, 0);
}
}
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), "UTF-8"));
//또는
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), StandardCharsets.UTF_8));
CSV 파일을 인코딩하지 않더라도 "EUC-KR"을 포함하면 한글 깨짐을 방지해준다.