
LangChain은 다양한 언어 모델과 도구를 통합해 43개 이상의 언어를 지원하는 번역 시스템을 구축할 수 있는 강력한 프레임워크입니다.
이 포스팅에서는 LangChain을 사용하여 전문적인 번역 시스템을 개발하는 방법을 살펴보겠습니다.
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.llms import HuggingFaceEndpoint
# 다국어 모델 설정
models = {
"en": ChatOpenAI(model="gpt-4"),
"ko": HuggingFaceEndpoint(repo_id="skt/ko-gpt-trinity-1.2B-v0.5"),
"ja": HuggingFaceEndpoint(repo_id="rinna/japanese-gpt-neox-3.6b")
}
system_template = """
You are a professional translator with 20 years of experience.
Translate following text from {source_lang} to {target_lang}:
[text]
Guidelines:
1. Maintain original tone and style
2. Preserve technical terms
3. Cultural context adaptation
"""
prompt = ChatPromptTemplate.from_template(system_template)
from langchain_core.runnables import RunnableParallel
from langchain_core.output_parsers import StrOutputParser
translation_chain = (
RunnableParallel({
"source_lang": lambda x: detect_language(x["text"]),
"target_lang": lambda x: x["target_lang"],
"text": lambda x: x["text"]
})
| prompt
| models["en"] # 기본 번역 엔진
| StrOutputParser()
)
# 언어 감지 함수 정의
def detect_language(text):
lang_detector = HuggingFacePipeline.from_model_id(
"papluca/xlm-roberta-base-language-detection"
)
return lang_detector(text[:500]) # 최적화를 위해 500자 제한
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
# 사용자 정의 용어 사전
term_base = FAISS.load_local("glossary_faiss", OpenAIEmbeddings())
def customize_translation(doc):
context = term_base.similarity_search(doc["text"], k=3)
return {"text": doc["text"], "context": context}
import streamlit as st
st.title("### Real-Time Translator")
source_text = st.text_area("Input Text", height=200)
target_lang = st.selectbox("Target Language", ["한국어", "English", "日本語"])
if st.button("Translate"):
result = translation_chain.invoke({
"text": source_text,
"target_lang": target_lang
})
st.markdown(f"**Result:**\n{result}")
from langchain.cache import SQLiteCache
import langchain
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
from langchain.chains import TransformChain
batch_translator = TransformChain(
input_variables=["texts"],
output_variables=["results"],
transform=lambda inputs: {
"results": [translation_chain.invoke(t) for t in inputs["texts"]]
}
)
from langsmith import Client
client = Client()
feedback_chain = translation_chain.with_config({
"callbacks": [client.get_callback()]
})
# 번역 품질 지표
translation_quality = {
"BLEU Score": calculate_bleu,
"TER": calculate_ter,
"METEOR": calculate_meteor
}