mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-27 22:37:46 +00:00
core: fix CommaSeparatedListOutputParser to handle columns that may contain commas in it (#26365)
- **Description:** Currently CommaSeparatedListOutputParser can't handle strings that may contain commas within a column. It would parse any commas as the delimiter. Ex. "foo, foo2", "bar", "baz" It will create 4 columns: "foo", "foo2", "bar", "baz" This should be 3 columns: "foo, foo2", "bar", "baz" - **Dependencies:** Added 2 additional imports, but they are built in python packages. import csv from io import StringIO - **Twitter handle:** @jkyamog - [ ] **Add tests and docs**: 1. added simple unit test test_multiple_items_with_comma --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from collections import deque
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from io import StringIO
|
||||
from typing import Optional as Optional
|
||||
from typing import TypeVar, Union
|
||||
|
||||
@@ -162,7 +164,14 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
Returns:
|
||||
A list of strings.
|
||||
"""
|
||||
return [part.strip() for part in text.split(",")]
|
||||
try:
|
||||
reader = csv.reader(
|
||||
StringIO(text), quotechar='"', delimiter=",", skipinitialspace=True
|
||||
)
|
||||
return [item for sublist in reader for item in sublist]
|
||||
except csv.Error:
|
||||
# keep old logic for backup
|
||||
return [part.strip() for part in text.split(",")]
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
|
Reference in New Issue
Block a user