chunk - 데이터 덩어리로 작업 할 때 각 커밋 사이에 처리되는 row수
Step은 Tasklet 단위로 처리되고, Tasklet 중에서 ChunkOrientedTasklet을 통해 Chunk를 처리하며 이를 구성하는 3 요소 ItemReader, ItemWriter, ItemProcessor
JPA를 사용하여 구현
아래 code는 Book에 kdc 정보를 update하는것을 작성한 것입니다.
@Entity
@Getter
@NoArgsConstructor
public class Book {
@Id
@Column(name = "ISBN")
private String isbn;
@Column(name = "BOOK_NAME")
private String bookName;
@Column(name = "AUTHOR")
private String author;
@Column(name = "PUBLISHER")
private String publisher;
@Column(name = "contents")
private String contents;
@Column(name = "KDC")
private String kdc;
@Column(name = "CATEGORY")
private String category;
@Column(name = "KEYWORD")
private String keyword;
@Column(name = "BOOK_IMAGE")
private String img;
@OneToMany(mappedBy = "book", cascade = CascadeType.ALL)
private List<BookReview> bookReviewList = new ArrayList<>();
@Builder
public Book(String isbn, String bookName, String author, String publisher,
String kdc, String category, String keyword, String img, String contents) {
this.isbn = isbn;
this.bookName = bookName;
this.author = author;
this.publisher = publisher;
this.kdc = kdc;
this.category = category;
this.keyword = keyword;
this.img = img;
this.contents = contents;
}
/**
* update kdc
*
* @author hyunho
* @since 2021/08/23
**/
public void updateKdc(String kdc){
this.kdc = kdc;
}
}
@Slf4j
@Configuration
@RequiredArgsConstructor
public class UpdateBookConfiguration {
private static final String JOB_NAME = "updateBookJob";
private final JobBuilderFactory jobBuilderFactory;
private final StepBuilderFactory stepBuilderFactory;
private final EntityManagerFactory entityManagerFactory;
private static String LIBRARY_OF_KOREA_BASE_URL = "https://www.nl.go.kr/NL/contents/search.do?pageNum=1&pageSize=30&srchTarget=total&kwd=";
private int chunkSize = 10;
@Bean
public Job updateBookJob() {
return jobBuilderFactory.get(JOB_NAME)
.start(jpaBookItemReaderStep())
.build();
}
@Bean
public Step jpaBookItemReaderStep() {
return stepBuilderFactory.get("jpaBookItemReaderStep")
.<Book, Book>chunk(chunkSize)
.reader(jpaBookItemReader())
.processor(updateBook())
.writer(jpaPagingItemWriter())
.build();
}
@Bean
public JpaPagingItemReader<Book> jpaBookItemReader() {
return new JpaPagingItemReaderBuilder<Book>()
.name("jpaBookItemReader")
.entityManagerFactory(entityManagerFactory)
.pageSize(chunkSize)
.queryString("select b from Book b where b.kdc is null")
.build();
}
@Bean
public ItemProcessor<Book, Book> updateBook() {
return item -> {
// item.updateKdc("");
log.info("updateBook() :: book list = {}", item.getBookName());
String url = LIBRARY_OF_KOREA_BASE_URL + item.getIsbn();
Document doc = Jsoup.connect(url).get();
String[] tempKdc = doc.select("#sub_content > div.content_wrap > div > div.integSearch_wrap > div.search_cont_wrap > div > div > div.search_right_section > div.section_cont_wrap > div:nth-child(1) > div.cont_list.list_type > div.row > span:nth-child(10)").text().replaceAll(" ", "").split(":");
tempKdc = tempKdc[1].split("-");
String kdc = tempKdc[0];
log.info("updateBook() :: kdc is = {}", kdc);
item.updateKdc(kdc);
return item;
};
}
@Bean
@StepScope
public JpaItemWriter<Book> jpaPagingItemWriter() {
JpaItemWriter<Book> jpaItemWriter = new JpaItemWriter<>();
jpaItemWriter.setEntityManagerFactory(entityManagerFactory);
return jpaItemWriter;
}
}
RepositoryItemReader
를 사용.