법령 리스트 조회 Code
import pandas as pd
import xml.etree.ElementTree as ET
from urllib.request import urlopen
from tqdm import trange
url = "http://www.law.go.kr/DRF/lawSearch.do?OC=ngho1202&target=law&type=XML"
response = urlopen(url).read()
xtree = ET.fromstring(response)
totalCnt = int(xtree.find('totalCnt').text)
page = 1
rows = []
for i in trange(int(totalCnt / 20)):
for node in xtree:
try:
법령일련번호 = node.find('법령일련번호').text
현행연혁코드 = node.find('현행연혁코드').text
법령명한글 = node.find('법령명한글').text
법령약칭명 = node.find('법령약칭명').text
법령ID = node.find('법령ID').text
공포일자 = node.find('공포일자').text
공포번호 = node.find('공포번호').text
제개정구분명 = node.find('제개정구분명').text
소관부처코드 = node.find('소관부처코드').text
소관부처명 = node.find('소관부처명').text
법령구분명 = node.find('법령구분명').text
소관부처명 = node.find('소관부처명').text
시행일자 = node.find('시행일자').text
자법타법여부 = node.find('자법타법여부').text
법령상세링크 = node.find('법령상세링크').text
rows.append({'법령일련번호': 법령일련번호,
'현행연혁코드': 현행연혁코드,
'법령명한글': 법령명한글,
'법령약칭명': 법령약칭명,
'법령ID': 법령ID,
'공포일자': 공포일자,
'공포번호': 공포번호,
'제개정구분명': 제개정구분명,
'소관부처코드': 소관부처코드,
'소관부처명': 소관부처명,
'소관부처코드': 소관부처코드,
'법령구분명': 법령구분명,
'시행일자': 시행일자,
'자법타법여부': 자법타법여부,
'법령상세링크': 법령상세링크})
print(rows)
except Exception as e:
continue
page += 1
url = "http://www.law.go.kr/DRF/lawSearch.do?OC=ngho1202&target=law&type=XML&page={}".format(page)
response = urlopen(url).read()
xtree = ET.fromstring(response)
cases = pd.DataFrame(rows)
cases.to_csv('./cases.csv', index=False)
법령 조문 조회 Code
import pandas as pd
import xml.etree.ElementTree as ET
from urllib.request import urlopen
from tqdm import trange
import re
law_list = pd.read_csv("C:\\Users\\user\\Desktop\\2022-1학기\\졸업작품\\APICrawling\\law_list.csv", encoding="cp949")
contents = ['조문번호','조문가지번호','조문여부','조문제목','조문시행일자',
'조문이동이전','조문이동이후','조문변경여부','조문제개정유형',
'조문내용','조문참고자료','항']
sub_dict = {}
rows = []
def remove_tag(content):
cleaned_text = re.sub('<.*?>', '', content)
return cleaned_text
for i in trange(len(law_list)):
url = "https://www.law.go.kr"
link = law_list.loc[i]['법령상세링크'].replace('HTML', 'XML')
url += link
response = urlopen(url).read()
xtree = ET.fromstring(response)
for n in range(len(xtree[1])):
sub_dict['조문키'] = xtree[1][n].attrib["조문키"]
for content in contents:
dict_key = content
try:
dict_value = xtree[1][n].find(content).text
'''
if dict_value is None:
dict_value = '내용없음'
else:
dict_value = remove_tag(dict_value)
'''
except:
continue
sub_dict[dict_key] = dict_value
rows.append(sub_dict)
sub_dict = {}
file_name = law_list.loc[i]['법령명한글']
path_format = './법령별/{0}.csv'.format(file_name)
each_law_content = pd.DataFrame(rows)
each_law_content.to_csv(path_format, index=False)
rows = []