Elasticsearch 사용법

Han Hanju·2021년 6월 28일
0
post-thumbnail

1. jupyter에서 사용하기

  • elasticsearch 설치
 pip install elasticsearch
  • 연동
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es = Elasticsearch("http://your_address.com:9200") 
es.info()

# 결과
{'name': 'airnd_datalake_001',
 'cluster_name': 'airnd_datalake_group',
 'cluster_uuid': 'k76pNCySRamAWDiZfEaPlA',
 'version': {'number': '7.4.2',
  'build_flavor': 'default',
  'build_type': 'tar',
  'build_hash': '2f90bbf7b93631e52bafb59b3b049cb44ec25e96',
  'build_date': '2019-10-28T20:40:44.881551Z',
  'build_snapshot': False,
  'lucene_version': '8.2.0',
  'minimum_wire_compatibility_version': '6.8.0',
  'minimum_index_compatibility_version': '6.0.0-beta1'},
 'tagline': 'You Know, for Search'}
  • 전체 count 확인
es.count(index='datalake_market_category_matching')

# 결과
{'count': 213280,
 '_shards': {'total': 3, 'successful': 3, 'skipped': 0, 'failed': 0}}
  • 전체 query 확인
data = {"match_all":{}}
body = {'from':0, 'size':2,"query":data}
results = es.search(index='datalake_market_category_matching', body=body)
print(len(results['hits']['hits']))

for result in results['hits']['hits']:
    print(result)
    
    
# 결과
2
{'_index': 'datalake_market_category_matching', '_type': 'market_category_matching', '_id': 'p8r1WnMBnb2QtO2ALFlZ', '_score': 1.0, '_source': {'market_category_name': '패션의류 > 여성의류 > 티셔츠', 'official_text': '의류', 'market_category_code': '50000803', 'market_product_code': '4934571687', 'market_code': 'shopn', 'product_code': 'P00000MV', 'shop_no': 1, 'market_product_name': '구매금지상품 필수다있음2', 'mall_id': 'ectopsu1327', 'product_no': 333, 'official_code': '1', 'timestamp': '2020-05-20T11:23:36+09:00', 'api_key': 'fc4bf0f8-f36f-5a61-a2b8-935fcea07e48'}}
{'_index': 'datalake_market_category_matching', '_type': 'market_category_matching', '_id': 'qcr1WnMBnb2QtO2ALFlZ', '_score': 1.0, '_source': {'market_category_name': '여성의류 > 트레이닝복 > 긴팔티셔츠', 'official_text': '의류', 'market_category_code': '1006648', 'market_product_code': '2859134819', 'market_code': 'sk11st', 'product_code': 'P00000MO', 'shop_no': 1, 'market_product_name': '구매금지상품 유효성통과1', 'mall_id': 'ectopsu1327', 'product_no': 326, 'official_code': '1', 'timestamp': '2020-05-20T11:23:43+09:00', 'api_key': 'fc4bf0f8-f36f-5a61-a2b8-935fcea07e48'}}
  • 특정 query 확인
data = {"match":{"official_text":"의류"}}
body = {'from':0, 'size':2,"query":data}
results = es.search(index='datalake_market_category_matching', body=body)
print(len(results['hits']['hits']))

for result in results['hits']['hits']:
    print(result)
    
    
# 결과
2
{'_index': 'datalake_market_category_matching', '_type': 'market_category_matching', '_id': 'MsorW3MBnb2QtO2Ai3lJ', '_score': 0.1342536, '_source': {'market_product_name': 'M cafe24 TEST hiang 0717 04 edibot', 'product_code': 'P00000ET', 'market_category_name': '브랜드 여성의류 > 트레이닝복 > 트레이닝 상의', 'market_category_code': '001201013001', 'official_code': '1', 'official_text': '의류', 'mall_id': 'ectqued1679', 'shop_no': 1, 'market_code': 'inpark', 'market_product_code': '7341169846', 'category_search_keyword': '', 'product_no': 123, 'timestamp': '2020-07-17T14:07:27+09:00', 'api_key': 'fc4bf0f8-f36f-5a61-a2b8-935fcea07e48'}}
{'_index': 'datalake_market_category_matching', '_type': 'market_category_matching', '_id': 'XMotW3MBnb2QtO2ADnmw', '_score': 0.1342536, '_source': {'market_product_name': 'M cafe24 TEST hiang 0717 04 edibot', 'product_code': 'P00000ET', 'market_category_name': '여성 > 의류 > 팬츠 > 트레이닝팬츠', 'market_category_code': '0001000100060007', 'official_code': '1', 'official_text': '의류', 'mall_id': 'ectqued1679', 'shop_no': 1, 'market_code': 'brich', 'market_product_code': '', 'category_search_keyword': '트레이닝', 'product_no': 123, 'timestamp': '2020-07-17T14:09:06+09:00', 'api_key': 'fc4bf0f8-f36f-5a61-a2b8-935fcea07e48'}}

2. kibana에서 사용하기

  • index 확인
GET _cat/indices
  • count 확인
GET datalake_market_category_matching/_count
  • 전체 쿼리 조회
GET datalake_market_category_matching/_search
{
  "size":10000,
  "query": {
    "match_all": {}
  }
}
  • 특정 쿼리 조회
GET datalake_market_category_matching/_search
{
  "size":10000,
  "query": {
    "match": {"official_text": "의류"}
  }
}
  • 오름차순, 내림차순 조회
GET datalake_market_category_matching/_search
{
  "from":1, "size":2,
  "query": {
    "match": {"shop_no":1}
  },
  "sort":[
    {
      "_id":{
        "order":"asc"
      }
    }
  ]
}
  • multi match
GET datalake_market_category_matching/_search
{
  "query": {
    "bool":{
      "must":[
        {"match": {"product_no":83101}},
        {"match": {"mall_id":"cafe0224"}}
      ]
    }
  }
}
  • group by
GET datalake_market_category_matching/_search
{
  "size": 0,
  "aggs": {
    "group_by_mall_id": {
      "terms": {
        "field": "market_code.keyword",
        "size" : 5000
      }
    }
  }
}

SELECT state, COUNT(*) FROM bank GROUP BY state ORDER BY COUNT(*) DESC

3. sql query를 이용한 검색법

  • sql검색
GET /_sql/translate
{
  "query": "SELECT mall_id, product_no, market_code,market_category_name  FROM datalake_market_category_matching where official_text='의류' and mall_id='baidbill92' and product_no=130"
}
  • sql에서 검색된 쿼리를 elasticsearch에 검색
GET datalake_market_category_matching/_search
{
  "size" : 0,
  "_source" : false,
  "stored_fields" : "_none_",
  "aggregations" : {
    "groupby" : {
      "composite" : {
        "size" : 1000,
        "sources" : [
          {
            "538" : {
              "terms" : {
                "field" : "mall_id.keyword",
                "missing_bucket" : true,
                "order" : "asc"
              }
            }
          }
        ]
      }
    }
  }
}

Reference

https://soyoung-new-challenge.tistory.com/78
https://krksap.tistory.com/1749

profile
Data Analytics Engineer

0개의 댓글