@inproceedings{10.1145/3459637.3482452, author = {Huang, Zhiqi and Bonab, Hamed and Sarwar, Sheikh Muhammad and Rahimi, Razieh and Allan, James}, title = {Mixed Attention Transformer for Leveraging Word-Level Knowledge to Neural Cross-Lingual Information Retrieval}, year = {2021}, isbn = {9781450384469}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3459637.3482452}, doi = {10.1145/3459637.3482452}, abstract = {Pre-trained contextualized representations offer great success for many downstream tasks, including document ranking. The multilingual versions of such pre-trained representations provide a possibility of jointly learning many languages with the same model. Although it is expected to gain big with such joint training, in the case of cross-lingual information retrieval (CLIR), the models under a multilingual setting are not achieving the same level of performance as those under a monolingual setting. We hypothesize that the performance drop is due to thetranslation gap between query and documents. In the monolingual retrieval task, because of the same lexical inputs, it is easier for model to identify the query terms that occurred in documents. However, in the multilingual pre-trained models that the words in different languages are projected into the same hyperspace, the model tends to "translate" query terms into related terms - i.e., terms that appear in a similar context - in addition to or sometimes rather than synonyms in the target language. This property is creating difficulties for the model to connect terms that co-occur in both query and document. To address this issue, we propose a novel Mixed Attention Transformer (MAT) that incorporates external word-level knowledge, such as a dictionary or translation table. We design a sandwich-like architecture to embed MAT into the recent transformer-based deep neural models. By encoding the translation knowledge into an attention matrix, the model with MAT is able to focus on the mutually translated words in the input sequence. Experimental results demonstrate the effectiveness of the external knowledge and the significant improvement of MAT-embedded neural reranking model on CLIR task.}, booktitle = {Proceedings of the 30th ACM International Conference on Information & Knowledge Management}, pages = {760–770}, numpages = {11}, keywords = {neural network, attention mechanism, cross-lingual information retrieval}, location = {Virtual Event, Queensland, Australia}, series = {CIKM '21} }