mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-25 12:44:04 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			37 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			37 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Test text splitting functionality using NLTK and Spacy based sentence splitters."""
 | |
| import pytest
 | |
| 
 | |
| from langchain.text_splitter import NLTKTextSplitter, SpacyTextSplitter
 | |
| 
 | |
| 
 | |
| def test_nltk_text_splitting_args() -> None:
 | |
|     """Test invalid arguments."""
 | |
|     with pytest.raises(ValueError):
 | |
|         NLTKTextSplitter(chunk_size=2, chunk_overlap=4)
 | |
| 
 | |
| 
 | |
| def test_spacy_text_splitting_args() -> None:
 | |
|     """Test invalid arguments."""
 | |
|     with pytest.raises(ValueError):
 | |
|         SpacyTextSplitter(chunk_size=2, chunk_overlap=4)
 | |
| 
 | |
| 
 | |
| def test_nltk_text_splitter() -> None:
 | |
|     """Test splitting by sentence using NLTK."""
 | |
|     text = "This is sentence one. And this is sentence two."
 | |
|     separator = "|||"
 | |
|     splitter = NLTKTextSplitter(separator=separator)
 | |
|     output = splitter.split_text(text)
 | |
|     expected_output = [f"This is sentence one.{separator}And this is sentence two."]
 | |
|     assert output == expected_output
 | |
| 
 | |
| 
 | |
| def test_spacy_text_splitter() -> None:
 | |
|     """Test splitting by sentence using Spacy."""
 | |
|     text = "This is sentence one. And this is sentence two."
 | |
|     separator = "|||"
 | |
|     splitter = SpacyTextSplitter(separator=separator)
 | |
|     output = splitter.split_text(text)
 | |
|     expected_output = [f"This is sentence one.{separator}And this is sentence two."]
 | |
|     assert output == expected_output
 |