cd /home/ubuntu
sudo apt update
sudo apt install build-essential -y
wget https://artifacts.opensearch.org/releases/bundle/opensearch/2.4.0/opensearch-2.4.0-linux-x64.tar.gz
tar -xvf opensearch-2.4.0-linux-x64.tar.gz
echo 'export OPENSEARCH_HOME=/home/ubuntu/opensearch-2.4.0' >> ~/.bashrc
source ~/.bashrc
$ $OPENSEARCH_HOME/bin/opensearch -version
sudo dd if=/dev/zero of=/swapfile bs=128M count=16
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
WARNING: A terminally deprecated method in java.lang.System has been called
WARNING: System::setSecurityManager has been called by org.opensearch.bootstrap.OpenSearch (file:/home/ubuntu/opensearch-2.4.0/lib/opensearch-2.4.0.jar)
WARNING: Please consider reporting this to the maintainers of org.opensearch.bootstrap.OpenSearch
WARNING: System::setSecurityManager will be removed in a future release
Version: 2.4.0, Build: tar/744ca260b892d119be8164f48d92b8810bd7801c/2022-11-15T04:42:29.671309257Z, JVM: 17.0.5
sudo swapoff -a
$ vi $OPENSEARCH_HOME/config/opensearch.yml
network.host: 0.0.0.0
discovery.type: single-node # single node로 설정
$ vi $OPENSEARCH_HOME/config/jvm.options
-Xms128m
-Xmx128m
bin/opensearch-plugin list // 설치된 plugin 확인
bin/opensearch-plugin remove opensearch-security // 인증이 오래 걸리기 때문에 security를 삭제
bin/opensearch
opensearch를 실행한 후 로컬에서 public IP로 접근
curl -X GET http://$IP:9200
$ sudo vi /etc/systemd/system/opensearch.service
[Unit]
Description=OpenSearch
Wants=network-online.target
After=network-online.target
[Service]
Type=forking
RuntimeDirectory=data
WorkingDirectory=/home/ubuntu/opensearch-2.4.0
ExecStart=/home/ubuntu/opensearch-2.4.0/bin/opensearch -d
User=ubuntu
Group=ubuntu
StandardOutput=journal
StandardError=inherit
LimitNOFILE=65535
LimitNPROC=4096
LimitAS=infinity
LimitFSIZE=infinity
TimeoutStopSec=0
KillSignal=SIGTERM
KillMode=process
SendSIGKILL=no
SuccessExitStatus=143
TimeoutStartSec=75
[Install]
WantedBy=multi-user.target
systemctl 파일을 생성
sudo systemctl daemon-reload
sudo systemctl enable opensearch.service
sudo systemctl start opensearch.service
ps -ef | grep opensearch
tail -f $OPENSEARCH_HOME/logs/opensearch.logs // log확인
export OPENSEARCH_REST_API=http://$IP:9200
Opensearch는 데이터를 저장하고, 인덱싱하고, 쿼리 기반으로 데이터를 찾는다는 점에서 데이터베이스와 유사
Opensearch에서 데이터를 검색하려면 데이터를 인덱싱해야 함
인덱싱
인덱싱 결과로 생성된 결과를 인덱스로 칭함
도큐먼트가 저장되는 공간
형식
- 모든 문자는 소문자
// movie 인덱스를 생성
$ curl -XPUT $OPENSEARCH_REST_API/movie
// {"acknowledged":true,"shards_acknowledged":true,"index":"movie"}%
$ curl --head $OPENSEARCH_REST_API/movie
//HTTP/1.1 200 OK # 없는 경우 404를 반환
//content-type: application/json; charset=UTF-8
//content-length: 231
// 인덱스 조회
$ curl -XGET "$OPENSEARCH_REST_API/movie?pretty=true"
$ curl -XPUT $OPENSEARCH_REST_API/movie/_mapping \
-H "Content-Type: application/json" \
-d '
{
"properties": {
"title": {
"type": "text"
}
}
}
}
// {"acknowledged":true}%
$ curl -XPUT $OPENSEARCH_REST_API/movie/_mapping \
-H "Content-Type: application/json" \
-d '
{
"properties": {
"genre": {
"type": "keyword" // 텍스트가 의미를 갖는 경우 text가 아닌 keyword 사용
}
}
}
}
'
// {"acknowledged":true}%
$ curl -XGET "$OPENSEARCH_REST_API
genre정보와 title 정보가 추가된 것을 확인할 수 있음
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Love Actually",
"genre": "Drama"
}
'
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Interstellar"
}
'
curl -XGET "$OPENSEARCH_REST_API/movie/_doc/m---C4sBQacXKI7m-IsF"
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Love Actually",
"genre": "Drame",
"directory": "Joseph Kosinski"
}
'
$ curl -XGET "$OPENSEARCH_REST_API/movie?pretty=true"
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Titanic",
"rate": "7.9" // rate는 text로 인식됨
}
'
인덱싱은 text로 되어있지만 숫자로 값을 넣어도 매핑을 위해 tostring을 적용하여 값을 저장
→ 값을 조회하면 숫자로 저장된 값을 반환해줌
curl -XPOST "$OPENSEARCH_REST_API/movie/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Titanic2",
"rate": 7.9
}
'
$ curl -XPOST "$OPENSEARCH_REST_API/movies/_doc?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"title": "Titanic",
"rate2": 7.9
}
'
rate2는 float으로 자동으로 설정됨
$ curl -XPOST "$OPENSEARCH_REST_API/movies/_doc?pretty=true" \
-H "Content-Type: application/json"
-d '
{
"title": "Titanic",
"rate2": "This is text"
}
"This is text"는 float으로 형변환할 수 없기 때문에 에러가 발생함
$ curl -XGET "$OPENSEARCH_REST_API/movie/_doc/$id?pretty=true"
$ curl -XGET "$OPENSEARCH_REST_API/movie/_search?pretty=true"
$ curl -XGET "$OPENSEARCH_REST_API/movie/_search?pretty=true&q=title:Titanic"
$ curl -XPUT "$OPENSEARCH_REST_API/movie/_doc/$id?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"director": "Richard Curtis"
}
'
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_doc/$id?pretty=true" \
-H "Concent-Type: application/json" \
-d '
{
"doc": {
"rate2": "8.0",
"genre": "Drama",
"rank": 1
}
}
'
$ curl -XDELETE "$OPENSEARCH_REST_API/movie/_doc/$id?pretty=true"
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_close?pretty=true"
$ curl -XGET "$OPENSEARCH_REST_API/movie/_search?pretty=true"
인덱스에 접근하려 하면 인덱스가 닫혀있는 것을 확인할 수 있음
$ curl -XPOST "$OPENSEARCH_REST_API/movie/_open?pretty=true"
$ curl -XDELETE "$OPENSEARCH_REST_API/movie"
$ curl -XPOST "$OPENSEARCH_REST_API/_bulk?pretty=true" \
-H "Content-Type: application/json" \
-d '
{"index": {"_index": "test", "_id": "1"} }
{"field1": "value1"}
{"delete": {"_index": "test", "_id": "2"} }
{"create": {"_index": "test", "_id": "3"} }
{"field1": "value3"}
{"update": {"_id": "1", "_index": "test"} }
{"doc": {"field2": "value2"} }
'
텍스트 분석기는 3가지 요소로 구성
캐릭터 필터 → 토크나이저 → 토큰 필터
$ curl -XGET "$OPENSEARCH_REST_API/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"analyzer": "standard",
"text": "Hello, World!"
}
'
$ curl -XGET "$OPENSEARCH_REST_API/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"analyzer": "standard",
"text": ["first array element!", "second array element 2"]
}
'
다음과 같이 사용자 지정 analyzer를 설정할 수 있음
$ curl -XGET "$OPENSEARCH_REST_API/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"tokenizer": "standard",
"filter": ["uppercase"],
"text": "Opensearch Custom Analyzer"
}
'
$ curl -XGET "$OPENSEARCH_REST_API/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"tokenizer": "standard",
"filter": ["lowercase"],
"char_filter": ["html_strip"],
"text": "<b>Hello</b> world"
}
'
$ curl -XPUT "$OPENSEARCH_REST_API/book?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"mappings": {
"properties": {
"description": {
"type": "text",
"analyzer": "whitespace"
}
}
}
}
'
$ curl -XGET "$OPENSEARCH_REST_API/book/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"field": "description",
"text": "Opensearch analyze test!!"
}
'
$ curl -XPUT "$OPENSEARCH_REST_API/music" \
-H "Content-Type: application/json" \
-d '
{
"settings": {
"analysis": {
"analyzer": {
"default": {
"type": "whitespace"
}
}
}
}
}
'
$ curl -XGET "$OPENSEARCH_REST_API/music/_analyze?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"text": "Opensearch analyze test!!"
}
'
$ curl -XPUT "$OPENSEARCH_REST_API/food?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"mappings": {
"properties": {
"category": {
"type": "keyword"
},
"review": {
"type": "text"
}
}
}
}
'
$ curl -XPOST "$OPENSEARCH_REST_API/_bulk?pretty=true" \
-H "Content-Type: application/json" \
-d '
{"index": {"_index": "food", "_id": "1"}}
{"category": "Fruit", "review": "Fruits are the means by which flowering plants disseminate their seeds."}
{"index": {"_index": "food", "_id": "2"}}
{"category": "Meat", "review": "Meat is animal flesh that is eaten as food."}
{"index": {"_index": "food", "_id": "3"}}
{"category": "Vegetable", "review": "Vegetables are parts of plants that are consumed by humans or other animals as food."}
{"index": {"_index": "food", "_id": "4"}}
{"category": "Bread", "review": "Bread is a staple food prepared from a dough of flour and water, usually by baking."}
{"index": {"_index": "food", "_id": "2"}}
{"category": "Fish", "review": "Fish are aquatic, craniate, gill-bearing animals that lack limbs with digits."}
'
전문 검색 쿼리를 사용하는 경우 검색어가 여러 도큐먼트에 포함된다면 연관성이 높은 순서대로 정렬해서 출력
$ curl -XGET "$OPENSEARCH_REST_API/food/_search?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"query": {
"match": {
"review": "flour, water"
}
}
}
'
curl -XGET "$OPENSEARCH_REST_API/food/_search?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"query": {
"match": {
"category": {"query": "Bread",
"analyzer": "standard"}
}
}
}
'
curl -XGET "$OPENSEARCH_REST_API/food/_search?pretty=true" \
-H "Content-Type: application/json" \
-d '
{
"query": {
"term": {
"review": "Bread is a staple food prepared from a dough of flour and water, usually by baking."
}
}
}
'
대단하시네요 ^__^