[LangChain] Query To SQL

Woong·2025년 2월 21일
0

Python / Machine Learning

목록 보기
25/27
post-thumbnail

아키텍처

  • question를 query 로 변환 -> query 실행 -> 쿼리 결과를 answer 로 가공

LLM 모델 선정 (공통)

  • azure 의 경우
import getpass
import os

if not os.environ.get("AZURE_OPENAI_API_KEY"):
  os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter API key for Azure: ")

from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)

DB connection 예제

  • SQLDatabase 로 db connection 관리 + query 생성에 활용할 db 스키마 활용
    • dialect 는 DB 종속성 (SQLite 인지 Mysql 인지 이런거)
    • get_usable_table_names 는 테이블명 목록
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///Chinook.db")
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM Artist LIMIT 10;")
출력 샘플
sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']
"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]"

query 모델

from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

LangGraph State 정의

  • 애플리케이션 input/output, 단계별 중간 데이터를 langgraph state 로 정의
from typing_extensions import TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

question -> query 변환

  • dialect 로 어떤 DB 인지 종속성, get_table_info 로 db 테이블 스키마 전달
from typing_extensions import Annotated


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}
  • ex) question 으로 쿼리 작성
write_query({"question": "How many Employees are there?"})
{'query': 'SELECT COUNT(EmployeeId) AS EmployeeCount FROM Employee;'}

쿼리 실행

from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool


def execute_query(state: State):
    """Execute SQL query."""
    execute_query_tool = QuerySQLDatabaseTool(db=db)
    return {"result": execute_query_tool.invoke(state["query"])}

answer 생성

def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = llm.invoke(prompt)
    return {"answer": response.content}

graph object 로 빌드

  • sequence 등록하여 graph 로 빌드
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence(
    [write_query, execute_query, generate_answer]
)
graph_builder.add_edge(START, "write_query")
graph = graph_builder.compile()

reference

0개의 댓글

관련 채용 정보