From 4277867103fc67328e2033176de4387b85e9960f Mon Sep 17 00:00:00 2001
From: Xiao <Shitao@users.noreply.huggingface.co>
Date: Thu, 1 Feb 2024 16:39:36 +0000
Subject: [PATCH] Update README.md

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 28d41dd..20c64bd 100644
--- a/README.md
+++ b/README.md
@@ -209,6 +209,13 @@ print(model.compute_score(sentence_pairs,
 - Long Document Retrieval
   - MLDR:   
   ![avatar](./imgs/long.jpg)
+  Please note that MLDR is a document retrieval dataset we constructed via LLM, 
+  covering 13 languages, including test set, validation set, and training set. 
+  We utilized the training set from MLDR to enhance the model's long document retrieval capabilities. 
+  Therefore, comparing baseline with `Dense w.o.long`(fine-tuning without long document dataset) is more equitable. 
+  Additionally, this long document retrieval dataset will be open-sourced to address the current lack of open-source multilingual long text retrieval datasets.
+  We believe that this data will be helpful for the open-source community in training document retrieval models.
+
   - NarritiveQA:  
   ![avatar](./imgs/nqa.jpg)