Map대신 String에 담아 출력 하게 코드 수정
- Map형식으로 데이터를 출력하니, 데이터의 순서가 뒤죽박죽
- String에 데이터를 각각 담아 보여줘도 일단 가능!
- DB에 데이터를 어떻게 담을지는 이미지 url 크롤링 후 천천히 생각해보기로..
이미지 제외 데이터만 크롤링 코드
public class WebCrawler7 {
private WebDriver driver;
private String url;
public static String WEB_DRIVER_ID = "webdriver.chrome.driver";
public static String WEB_DRIVER_PATH = "C:/work/chromedriver.exe";
public void crawlMap(String location) {
System.setProperty(WEB_DRIVER_ID, WEB_DRIVER_PATH);
ChromeOptions options = new ChromeOptions();
options.setCapability("ignoreProtectedModeSettings", true);
driver = new ChromeDriver(options);
url = "https://map.naver.com/v5/";
driver.get(url);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebElement inputSearch = driver.findElement(By.className("input_search"));
String inputKey = " 서구 카페";
inputSearch.sendKeys(location + inputKey);
inputSearch.sendKeys(Keys.ENTER);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
driver.switchTo().frame("searchIframe");
WebElement scrollBox = driver.findElement(By.id("_pcmap_list_scroll_container"));
Actions builder = new Actions(driver);
for (int i = 0; i < 6; i++) {
((JavascriptExecutor) driver).executeScript("arguments[0].scrollIntoView(true);", scrollBox);
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
List<WebElement> elements = driver.findElements(By.className("TYaxT"));
for (WebElement e : elements) {
e.click();
String key = e.getText();
try {
Thread.sleep(2000);
} catch (InterruptedException ex) {
ex.printStackTrace();
}
driver.switchTo().parentFrame();
driver.switchTo().frame(driver.findElement(By.id("entryIframe")));
String address = driver.findElement(By.className("LDgIH")).getText();
String phoneNumber;
try {
phoneNumber = driver.findElement(By.className("xlx7Q")).getText();
} catch (Exception ex) {
phoneNumber = null;
}
String businessHours;
try {
WebElement button = driver.findElement(By.className("RMgN0"));
button.click();
List<WebElement> dayElements = driver.findElements(By.xpath("//span[@class='i8cJw']"));
List<WebElement> timeElements = driver.findElements(By.xpath("//div[@class='H3ua4']"));
StringBuilder businessHoursBuilder = new StringBuilder();
for (int j = 0; j < dayElements.size(); j++) {
String day = dayElements.get(j).getText();
String time = timeElements.get(j).getText();
String temp = day + " " + time + "; ";
businessHoursBuilder.append(temp);
}
businessHours = businessHoursBuilder.toString();
} catch (Exception ex) {
businessHours = null;
}
String menuInfo;
try {
List<WebElement> menuEles = driver.findElements(By.className("VQvNX"));
List<WebElement> priceEles = driver.findElements(By.className("gl2cc"));
StringBuilder menuInfoBuilder = new StringBuilder();
for (int i = 0; i < menuEles.size(); i++) {
String temp = menuEles.get(i).getText() + ":" + priceEles.get(i).getText() + ";";
menuInfoBuilder.append(temp);
}
menuInfo = menuInfoBuilder.toString();
} catch (Exception ex) {
menuInfo = null;
}
String facilities;
try {
WebElement facilitiesElement = driver.findElement(By.className("xPvPE"));
facilities = facilitiesElement.getText();
} catch (Exception ex) {
facilities = null;
}
System.out.println("Name: " + key);
System.out.println("Address: " + address);
System.out.println("Phone Number: " + phoneNumber);
System.out.println("Business Hours: " + businessHours);
System.out.println("Menu Info: " + menuInfo);
System.out.println("Facilities: " + facilities);
driver.switchTo().parentFrame();
driver.switchTo().frame("searchIframe");
}
}
public static void main(String[] args) {
WebCrawler7 crawler = new WebCrawler7();
crawler.crawlMap("대전");
}
}
카페 이미지 크롤링 코드
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.interactions.Actions;
public class WebCrawler13 {
private WebDriver driver;
private String url;
public static String WEB_DRIVER_ID = "webdriver.chrome.driver";
public static String WEB_DRIVER_PATH = "C:/work/chromedriver.exe";
public void crawlMap(String location) {
System.setProperty(WEB_DRIVER_ID, WEB_DRIVER_PATH);
ChromeOptions options = new ChromeOptions();
options.setCapability("ignoreProtectedModeSettings", true);
driver = new ChromeDriver(options);
url = "https://map.naver.com/v5/";
driver.get(url);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebElement inputSearch = driver.findElement(By.className("input_search"));
String inputKey = " 동구 카페";
inputSearch.sendKeys(location + inputKey);
inputSearch.sendKeys(Keys.ENTER);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
driver.switchTo().frame("searchIframe");
WebElement scrollBox = driver.findElement(By.id("_pcmap_list_scroll_container"));
Actions builder = new Actions(driver);
for (int i = 0; i < 6; i++) {
((JavascriptExecutor) driver).executeScript("arguments[0].scrollIntoView(true);", scrollBox);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
List<WebElement> elements = driver.findElements(By.className("TYaxT"));
for (WebElement e : elements) {
e.click();
String key = e.getText();
try {
Thread.sleep(2000);
} catch (InterruptedException ex) {
ex.printStackTrace();
}
driver.switchTo().parentFrame();
driver.switchTo().frame(driver.findElement(By.id("entryIframe")));
String address = driver.findElement(By.className("LDgIH")).getText();
String phoneNumber;
try {
phoneNumber = driver.findElement(By.className("xlx7Q")).getText();
} catch (Exception ex) {
phoneNumber = null;
}
String businessHours;
try {
WebElement button = driver.findElement(By.className("RMgN0"));
button.click();
List<WebElement> dayElements = driver.findElements(By.xpath("//span[@class='i8cJw']"));
List<WebElement> timeElements = driver.findElements(By.xpath("//div[@class='H3ua4']"));
StringBuilder businessHoursBuilder = new StringBuilder();
for (int j = 0; j < dayElements.size(); j++) {
String day = dayElements.get(j).getText();
String time = timeElements.get(j).getText();
String temp = day + " " + time + "; ";
businessHoursBuilder.append(temp);
}
businessHours = businessHoursBuilder.toString();
} catch (Exception ex) {
businessHours = null;
}
String menuInfo;
try {
List<WebElement> menuEles = driver.findElements(By.className("VQvNX"));
List<WebElement> priceEles = driver.findElements(By.className("gl2cc"));
StringBuilder menuInfoBuilder = new StringBuilder();
for (int i = 0; i < menuEles.size(); i++) {
String temp = menuEles.get(i).getText() + ":" + priceEles.get(i).getText() + ";";
menuInfoBuilder.append(temp);
}
menuInfo = menuInfoBuilder.toString();
} catch (Exception ex) {
menuInfo = null;
}
String facilities;
try {
WebElement facilitiesElement = driver.findElement(By.className("xPvPE"));
facilities = facilitiesElement.getText();
} catch (Exception ex) {
facilities = null;
}
List<WebElement> imageElements = driver.findElements(By.cssSelector("div.K0PDV._div"));
for (WebElement imageElement : imageElements) {
String styleAttribute = imageElement.getAttribute("style");
String imageUrl = extractImageUrlFromStyleAttribute(styleAttribute);
System.out.println("Image URL: " + imageUrl);
}
System.out.println("Name: " + key);
System.out.println("Address: " + address);
System.out.println("Phone Number: " + phoneNumber);
System.out.println("Business Hours: " + businessHours);
System.out.println("Menu Info: " + menuInfo);
System.out.println("Facilities: " + facilities);
driver.switchTo().parentFrame();
driver.switchTo().frame("searchIframe");
}
}
private String extractImageUrlFromStyleAttribute(String styleAttribute) {
String imageUrl = "";
if (styleAttribute != null && styleAttribute.contains("background-image: url(")) {
int startIndex = styleAttribute.indexOf("url(") + 4;
int endIndex = styleAttribute.indexOf(")", startIndex);
imageUrl = styleAttribute.substring(startIndex, endIndex).replaceAll("'", "").replaceAll("\"", "");
}
return imageUrl;
}
public static void main(String[] args) {
WebCrawler13 crawler = new WebCrawler13();
crawler.crawlMap("대전");
}
}