最近在开发,联想词的功能,但是我发现,搜索 '词'时, ‘词xxx’ 和 'xxx词' 得分相同。配置信息如下
PUT /index_drug_associate/
{
"settings" : {
"analysis" : {
"analyzer" : {
"pinyin_full_analyzer" : {
"tokenizer" : "whitespace",
"filter" : ["my_pinyin_full", "word_delimiter"]
}
},
"filter" : {
"my_pinyin_full" : {
"type" : "pinyin",
"keep_first_letter":true,
"keep_separate_first_letter" : true,
"keep_full_pinyin" : true,
"keep_original" : false,
"limit_first_letter_length" : 16,
"lowercase" : true
}
}
},
"index" : {
"similarity" : {
"my_similarity" : {
"type" : "BM25",
"b" : "0.1",
"k1" : "0.3"
}
}
},
"number_of_replicas": 1,
"number_of_shards": 1
}
}
POST /index_drug_associate/_mapping/drugAssociate
{
"properties": {
"word": {
"type": "text",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_positions_offsets",
"analyzer": "pinyin_full_analyzer",
"boost": 10,
"similarity": "my_similarity"
}
}
}
}
}
POST /_bulk
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "1s" } }
{ "word": "阿莫西林XXX" , "pv": 1}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "2s" } }
{ "word": "阿莫西林", "pv": 2 }
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "3s" } }
{ "word": "XXX阿莫西林" , "pv": 3}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "4s" } }
{ "word": "啊莫西林胶囊" , "pv": 4}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "5s" } }
{ "word": "XXX阿莫西林XXXX", "pv": 5 }
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "6s" } }
{ "word": "XXX阿莫 西林XX" , "pv": 6 }
查询
GET /index_drug_associate/_search
{
"explain": false,
"query": {
"bool": {
"filter": {
"range": {
"pv": {
"gte": 0,
"lte": 20
}
}
},
"should": [
{
"match_phrase": {
"word.pinyin": {
"query": "al",
"slop": 5,
"boost": 2
}
}
}
]
}
}
}
结果
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 3.9376342,
"hits": [
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "2s",
"_score": 3.9376342,
"_source": {
"word": "阿莫西林",
"pv": 2
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "4s",
"_score": 3.9053564,
"_source": {
"word": "啊莫西林胶囊",
"pv": 4
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "1s",
"_score": 3.8894148,
"_source": {
"word": "阿莫西林XXX",
"pv": 1
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "3s",
"_score": 3.8894148,
"_source": {
"word": "XXX阿莫西林",
"pv": 3
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "6s",
"_score": 3.8579192,
"_source": {
"word": "XXX阿莫 西林XX",
"pv": 6
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "5s",
"_score": 3.8269293,
"_source": {
"word": "XXX阿莫西林XXXX",
"pv": 5
}
}
]
}
}
预期结果:
阿莫西林XXX 的得分应高于 XXX阿莫西林
实际结果:
阿莫西林XXX 的得分 = XXX阿莫西林
PUT /index_drug_associate/
{
"settings" : {
"analysis" : {
"analyzer" : {
"pinyin_full_analyzer" : {
"tokenizer" : "whitespace",
"filter" : ["my_pinyin_full", "word_delimiter"]
}
},
"filter" : {
"my_pinyin_full" : {
"type" : "pinyin",
"keep_first_letter":true,
"keep_separate_first_letter" : true,
"keep_full_pinyin" : true,
"keep_original" : false,
"limit_first_letter_length" : 16,
"lowercase" : true
}
}
},
"index" : {
"similarity" : {
"my_similarity" : {
"type" : "BM25",
"b" : "0.1",
"k1" : "0.3"
}
}
},
"number_of_replicas": 1,
"number_of_shards": 1
}
}
POST /index_drug_associate/_mapping/drugAssociate
{
"properties": {
"word": {
"type": "text",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_positions_offsets",
"analyzer": "pinyin_full_analyzer",
"boost": 10,
"similarity": "my_similarity"
}
}
}
}
}
POST /_bulk
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "1s" } }
{ "word": "阿莫西林XXX" , "pv": 1}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "2s" } }
{ "word": "阿莫西林", "pv": 2 }
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "3s" } }
{ "word": "XXX阿莫西林" , "pv": 3}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "4s" } }
{ "word": "啊莫西林胶囊" , "pv": 4}
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "5s" } }
{ "word": "XXX阿莫西林XXXX", "pv": 5 }
{ "create": { "_index": "index_drug_associate", "_type": "drugAssociate", "_id": "6s" } }
{ "word": "XXX阿莫 西林XX" , "pv": 6 }
查询
GET /index_drug_associate/_search
{
"explain": false,
"query": {
"bool": {
"filter": {
"range": {
"pv": {
"gte": 0,
"lte": 20
}
}
},
"should": [
{
"match_phrase": {
"word.pinyin": {
"query": "al",
"slop": 5,
"boost": 2
}
}
}
]
}
}
}
结果
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 3.9376342,
"hits": [
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "2s",
"_score": 3.9376342,
"_source": {
"word": "阿莫西林",
"pv": 2
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "4s",
"_score": 3.9053564,
"_source": {
"word": "啊莫西林胶囊",
"pv": 4
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "1s",
"_score": 3.8894148,
"_source": {
"word": "阿莫西林XXX",
"pv": 1
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "3s",
"_score": 3.8894148,
"_source": {
"word": "XXX阿莫西林",
"pv": 3
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "6s",
"_score": 3.8579192,
"_source": {
"word": "XXX阿莫 西林XX",
"pv": 6
}
},
{
"_index": "index_drug_associate",
"_type": "drugAssociate",
"_id": "5s",
"_score": 3.8269293,
"_source": {
"word": "XXX阿莫西林XXXX",
"pv": 5
}
}
]
}
}
预期结果:
阿莫西林XXX 的得分应高于 XXX阿莫西林
实际结果:
阿莫西林XXX 的得分 = XXX阿莫西林
2 个回复
vergilyn
赞同来自:
https://www.elastic.co/guide/e ... .html
tacsklet - 公司有用到es
赞同来自:
你的这个需求应该是希望查询字段出现的位置越靠前得分越高,那么可以通过indexof拿到查询字段出现的位置取倒数。或者你想一下更合适的方法。
https://www.elastic.co/guide/e ... .html