format_fix_ver.1
{ Status: What went wrong
Symptoms: Logs/Errors/Manifestations
Cause: Why it happened (including estimation → verification process)
Resolution: Actions I took
Preventing recurrence: What to do next time }
ModuleNotFoundError: No module named 'config' > python scripts/rule_filter.py Traceback (most recent call last): File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/site-packages/pandas/compat/_optional.py", line 135, in import_optional_dependency module = importlib.import_module(name) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/importlib/__init__.py", line 126, in import_module return _bootstrap._gcd_import(name[level:], package, level) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "<frozen importlib._bootstrap>", line 1204, in _gcd_import File "<frozen importlib._bootstrap>", line 1176, in _find_and_load File "<frozen importlib._bootstrap>", line 1140, in _find_and_load_unlocked ModuleNotFoundError: No module named 'openpyxl' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/Users/ky/Applications/AssiPER/scripts/rule_filter.py", line 73, in <module> main("inputs/nationwide.xlsx", "outputs/flagged_rows.xlsx") File "/Users/ky/Applications/AssiPER/scripts/rule_filter.py", line 46, in main df = pd.read_excel(input_xlsx) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/site-packages/pandas/io/excel/_base.py", line 495, in read_excel io = ExcelFile( ^^^^^^^^^^ File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/site-packages/pandas/io/excel/_base.py", line 1567, in __init__ self._reader = self._engines[engine]( ^^^^^^^^^^^^^^^^^^^^^^ File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/site-packages/pandas/io/excel/_openpyxl.py", line 552, in __init__ import_optional_dependency("openpyxl") File "/Users/ky/.pyenv/versions/3.11.6/lib/python3.11/site-packages/pandas/compat/_optional.py", line 138, in import_optional_dependency raise ImportError(msg)
> python -m scripts.rule_fliter /Users/ky/Applications/AssiPER/.venv/bin/python: No module named scripts.rule_fliter > python -m scripts.rule_fliter /Users/ky/Applications/AssiPER/.venv/bin/python: No module named scripts.rule_fliter
flags:12116
python -m scripts.rule_filter [OK] flagged rows saved -> outputs/flagged_rows.xlsx
- total rows: 14337
- flagged rows: 6679
> python -m scripts.apply_to_original [OK] saved -> outputs/nationwide_updated.xlsx
- rows updated: 0
조건을 바꿨는데 0일리 없다
err7 UnboundLocalError
UnboundLocalError: cannot access local variable 'd_marked' where it is not associated with a valuedef main(): wb = load_workbook(ORIGINAL_XLSX) ws = wb.active max_row = ws.max_row filled = 0 skipped = 0 for r in range(2, max_row + 1): # 1행은 헤더라고 가정 d = s(ws[f"{COL_D}{r}"].value) e = s(ws[f"{COL_E}{r}"].value) g = s(ws[f"{COL_G}{r}"].value) h = s(ws[f"{COL_H}{r}"].value) # ✅ E가 비었거나 너무 짧고, H가 충분히 길면 -> H를 E에 복사 if len(e) < MIN_E_LEN and len(h) >= MIN_H_LEN: # 법령명(G)도 같이 넣고 싶으면 prefix로 붙이기 (선택) e_new = f"[{g}]\n{h}" if g else h #e_new = h ws[f"{COL_E}{r}"].value = e_new filled += 1 if d == "": ws[f"{COL_D}{r}"].value = "AI조항복붙" d_marked += 1 else: skipped += 1 wb.save(OUTPUT_XLSX) print(f"[OK] saved -> {OUTPUT_XLSX}") print(f"- E filled from H: {filled}") print(f"- skipped: {skipped}") if __name__ == "__main__":
python -m scripts.apply_flagged_to_autofilled [OK] saved -> outputs/nationwide_final.xlsx - rows updated: 0 > python -m scripts.rule_filter [OK] flagged rows saved -> outputs/flagged_rows_after_autofill.xlsx - total rows: 14337 - flagged rows: 24
err 9 : script 가 못잡아낸 공백
err 10 : "AI조항없음" 을 "이상없음"으로 바꿀예정
err 11: 행밀림 현상
e1 : config 위치 (최상단 부모 프로젝트 하단) + module화위해 script init.py 추가
e2 : typo error
e3 : flag 전수조사각이라 너무 많아서 규칙추가
e4 : zip -r peri.zip . -x "*.git*" -x "*.xlsx*" -x "*.pyc" -x "*.venv*"
e6 : column 변수명 불일치 (엑셀 간 한글 변수명)
e7 : scope 변수명 이슈 (최상단 d_marked=0 초기화 선언.)
e8: 로직 이해 미스.
e9
2) D 이상없음 E 공백
D : “이상없음” = E: “(규제 조항) 해당 없음 / 조례 없음 / 기준 미설정이라서 검수 결과 문제 없음”
- E index 추천 : "해당없음","조례없음","기준미비"
3) D도 공백이고 E도 공백인 행이 3개
셀에 “눈에 안 보이는 공백”이 들어간 경우
4) D열 값이 “이상 없음”처럼 띄어쓰기 버전인 경우
t = t.replace("이상 없음", "이상없음")
e9, 10: rule 추가("AI조항" -삭제 후 추가)
F_NO_BASE = {"조례없음", "기준미설정", "규정없음"}
...(중략)
def main():
wb = load_workbook(INPUT_XLSX)
ws = wb.active
# 삭제
removed = 0
for r in range(2, ws.max_row + 1):
cell = ws[f"{COL_D}{r}"]
if s(cell.value) == "AI조항복붙":
cell.value = ""
removed += 1
wb.save(OUTPUT_XLSX)
# 추가
filled = 0
for r in range(2, ws.max_row + 1):
d = s(ws[f"{COL_D}{r}"].value)
e = s(ws[f"{COL_E}{r}"].value)
f = s(ws[f"{COL_F}{r}"].value)
# D가 이상없음(또는 이상 없음)이고 E가 빈칸인데,
# F가 조례없음/기준미설정/규정없음이면 → E 채우기
if d.replace(" ", "") == "이상없음" and e == "" and f in F_NO_BASE:
ws[f"{COL_E}{r}"].value = "해당 없음"
filled += 1
python -m scripts.rule_filter [OK] flagged rows saved -> outputs/flagged_rows.xlsx
- total rows: 14337
- flagged rows: 1150
> python -m scripts.autofill_e_from_h [OK] saved -> outputs/nationwide_autofilled.xlsx
- E filled from H: 9647
- skipped: 4690
python -m scripts.autofill_e_from_h [OK] saved -> outputs/nationwide_autofilled.xlsx
- E filled from H: 9647
- skipped: 4690
> python -m scripts.rule_filter [OK] flagged rows saved -> outputs/flagged_rows_after_autofill.xlsx
- total rows: 14337
- flagged rows: 24
#...ㄷㄷ 엄청나다
-> 사람손으로 채워야하는수 24개
-> fin
> python -m scripts.apply_flagged_to_autofilled [OK] saved -> outputs/nationwide_final.xlsx - rows updated: 0
> python -m scripts.apply_flagged_to_autofilled [OK] saved -> outputs/nationwide_final.xlsx - rows updated: 24
> python -m scripts.rule_filter [OK] flagged rows saved -> outputs/flagged_rows_final.xlsx - total rows: 14337
a = pd.read_excel(a_path, engine="openpyxl")
b = pd.read_excel(b_path, engine="openpyxl")
a = _norm_df(a)
b = _norm_df(b)
a_keys = _key_set(a)
b_keys = _key_set(b)
cnt_a = a.groupby("항목명").size().reset_index(name="rows_a")
cnt_b = b.groupby("항목명").size().reset_index(name="rows_b")
per_item = cnt_a.merge(cnt_b, on="항목명", how="outer").fillna(0)
per_item["diff(a-b)"] = per_item["rows_a"].astype(int) - per_item["rows_b"].astype(int)
per_item = per_item.sort_values(by="diff(a-b)", key=lambda s: s.abs(), ascending=False)
missing_in_b = pd.DataFrame(list(a_keys - b_keys), columns=KEY_COLS).sort_values(KEY_COLS)
missing_in_a = pd.DataFrame(list(b_keys - a_keys), columns=KEY_COLS).sort_values(KEY_COLS)
# Ordering mismatches (도시명 sequence) per 항목명
mismatches = []
common_items = sorted(set(a["항목명"]) & set(b["항목명"]))
for item in common_items:
la = a.loc[a["항목명"]==item, "도시명"].tolist()
lb = b.loc[b["항목명"]==item, "도시명"].tolist()
if len(la) != len(lb):
mismatches.append([item, "len_diff", len(la), len(lb), None, None])
continue
for i,(ca,cb) in enumerate(zip(la,lb)):
if ca != cb:
try:
j = lb.index(ca)
except ValueError:
j = None
mismatches.append([item, "order_mismatch", len(la), len(lb), i, j])
break
order_df = pd.DataFrame(mismatches, columns=["항목명","type","rows_a","rows_b","first_mismatch_idx","idx_in_b_of_a_city"])
-> 결과값
> python -m scripts.debug_compare \
--a "전수조사취합_20260115_noErr.xlsx" \
--b "nationwide_final_copy.xlsx" \
--out "debug_out"
[OK] wrote reports -> debug_out
- missing_in_b: 0
- missing_in_a: 1
- items with order mismatch: 0
- per_item_count rows: 60