@inproceedings{10.1145/3409256.3409811, author = {Yu, Puxuan and Rahimi, Razieh and Huang, Zhiqi and Allan, James}, title = {Learning to Rank Entities for Set Expansion from Unstructured Data}, year = {2020}, isbn = {9781450380676}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3409256.3409811}, doi = {10.1145/3409256.3409811}, abstract = {We propose using learning-to-rank for entity set expansion (ESE) from unstructured data, the task of finding "sibling" entities within a corpus that are from the set characterized by a small set of seed entities. We present a two-channel neural re-ranking model, NESE, that jointly learns exact and semantic matching of entity contexts through entity interaction features. Although entity set expansion has drawn increasing attention in the IR and NLP communities for its various applications, the lack of massive annotated entity sets has hindered the development of neural approaches. We describe DBpedia-Sets, a toolkit that automatically extracts entity sets from a plain text collection, thus providing a large amount of distant supervision data for neural model training. Experiments on real datasets of different scales from different domains show that NESE outperforms state-of-the-art approaches in terms of precision and MAP. Furthermore, evaluation through human annotations shows that the knowledge learned from the training data is generalizable.}, booktitle = {Proceedings of the 2020 ACM SIGIR on International Conference on Theory of Information Retrieval}, pages = {21–28}, numpages = {8}, keywords = {neural networks, set completion, query by example}, location = {Virtual Event, Norway}, series = {ICTIR '20} }