[LangChain] Query To SQL

Woong·2025년 2월 21일

langChain

Python / Machine Learning

목록 보기

25/27

아키텍처

question를 query 로 변환 -> query 실행 -> 쿼리 결과를 answer 로 가공

LLM 모델 선정 (공통)

azure 의 경우

import getpass
import os

if not os.environ.get("AZURE_OPENAI_API_KEY"):
  os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")

from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

DB connection 예제

SQLDatabase 로 db connection 관리 + query 생성에 활용할 db 스키마 활용
- dialect 는 DB 종속성 (SQLite 인지 Mysql 인지 이런거)
- get_usable_table_names 는 테이블명 목록

from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///Chinook.db")
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM Artist LIMIT 10;")

출력 샘플

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']

"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]"

query 모델

prompt hub 에서 모델 풀링

from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

LangGraph State 정의

애플리케이션 input/output, 단계별 중간 데이터를 langgraph state 로 정의

from typing_extensions import TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

question -> query 변환

dialect 로 어떤 DB 인지 종속성, get_table_info 로 db 테이블 스키마 전달

from typing_extensions import Annotated


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}

ex) question 으로 쿼리 작성

write_query({"question": "How many Employees are there?"})

{'query': 'SELECT COUNT(EmployeeId) AS EmployeeCount FROM Employee;'}

쿼리 실행

자동화된 쿼리를 실행해도 좋은지 검토하고, DB 권한 최소화할것.
- 운영자 승인하에 실행하도록 하는 것도 좋음
langchain_community tool 로 쿼리 실행
- https://python.langchain.com/api_reference/community/tools/langchain_community.tools.sql_database.tool.QuerySQLDatabaseTool.html

from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool


def execute_query(state: State):
    """Execute SQL query."""
    execute_query_tool = QuerySQLDatabaseTool(db=db)
    return {"result": execute_query_tool.invoke(state["query"])}

answer 생성

def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = llm.invoke(prompt)
    return {"answer": response.content}

graph object 로 빌드

sequence 등록하여 graph 로 빌드

from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence(
    [write_query, execute_query, generate_answer]
)
graph_builder.add_edge(START, "write_query")
graph = graph_builder.compile()