import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HtmlParsingTest {
public static void main(String[] args) throws Exception {
StringBuilder contentBuilder = new StringBuilder();
try {
BufferedReader in = new BufferedReader(new FileReader("htmlForParsing.html"));
String str;
while ((str = in.readLine()) != null) {
contentBuilder.append(str);
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
String content = contentBuilder.toString();
Document doc = Jsoup.parse(content);
Elements imgs = doc.getElementsByTag("img");
if (imgs.size() > 0) {
for (Element img : imgs) {
String src = img.attr("src");
if (src.contains("data:image/")) {
String extension = src.substring(src.indexOf("/") + 1, src.indexOf(";"));
String base64Image = src.split(",")[1];
byte[] imageBytes = javax.xml.bind.DatatypeConverter.parseBase64Binary(base64Image);
String filePath = "/upload/20210311/";
Calendar calendar = Calendar.getInstance();
Date date = calendar.getTime();
String fileName = (new SimpleDateFormat("yyyyMMddHHmmss").format(date)) + "." + extension;
img.attr("src", filePath + fileName);
}
}
}
System.out.println(doc.getElementsByTag("img").toString());
File f = new File("test111.html");
FileUtils.writeStringToFile(f, doc.outerHtml());
}
}