From 325bdac6733cd5508697f2dedaa2cab3e043f1b3 Mon Sep 17 00:00:00 2001 From: Andrew Teeter Date: Sun, 19 Nov 2023 20:35:17 -0500 Subject: [PATCH] feat: load all namespaces (#13549) - **Description:** This change allows for the `MWDumpLoader` to load all namespaces including custom by default instead of only loading the [default namespaces](https://www.mediawiki.org/wiki/Help:Namespaces#Localisation). - **Tag maintainer:** @hwchase17 --- libs/langchain/langchain/document_loaders/mediawikidump.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/mediawikidump.py b/libs/langchain/langchain/document_loaders/mediawikidump.py index d4f7815cec2..83e0911ab5b 100644 --- a/libs/langchain/langchain/document_loaders/mediawikidump.py +++ b/libs/langchain/langchain/document_loaders/mediawikidump.py @@ -55,7 +55,7 @@ class MWDumpLoader(BaseLoader): self.file_path = file_path if isinstance(file_path, str) else str(file_path) self.encoding = encoding # Namespaces range from -2 to 15, inclusive. - self.namespaces = namespaces or list(range(-2, 16)) + self.namespaces = namespaces self.skip_redirects = skip_redirects self.stop_on_error = stop_on_error @@ -76,7 +76,7 @@ class MWDumpLoader(BaseLoader): for page in dump.pages: if self.skip_redirects and page.redirect: continue - if page.namespace not in self.namespaces: + if self.namespaces and page.namespace not in self.namespaces: continue try: for revision in page: