langchain/libs/community/langchain_community/utilities/bibtex.py
Erick Friis c2a3021bb0
multiple: pydantic 2 compatibility, v0.3 (#26443)
Signed-off-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com>
Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com>
Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: ccurme <chester.curme@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Co-authored-by: ZhangShenao <15201440436@163.com>
Co-authored-by: Friso H. Kingma <fhkingma@gmail.com>
Co-authored-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Nuno Campos <nuno@langchain.dev>
Co-authored-by: Morgante Pell <morgantep@google.com>
2024-09-13 14:38:45 -07:00

89 lines
2.4 KiB
Python

"""Util that calls bibtexparser."""
import logging
from typing import Any, Dict, List, Mapping
from pydantic import BaseModel, ConfigDict, model_validator
logger = logging.getLogger(__name__)
OPTIONAL_FIELDS = [
"annotate",
"booktitle",
"editor",
"howpublished",
"journal",
"keywords",
"note",
"organization",
"publisher",
"school",
"series",
"type",
"doi",
"issn",
"isbn",
]
class BibtexparserWrapper(BaseModel):
"""Wrapper around bibtexparser.
To use, you should have the ``bibtexparser`` python package installed.
https://bibtexparser.readthedocs.io/en/master/
This wrapper will use bibtexparser to load a collection of references from
a bibtex file and fetch document summaries.
"""
model_config = ConfigDict(
extra="forbid",
)
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: Dict) -> Any:
"""Validate that the python package exists in environment."""
try:
import bibtexparser # noqa
except ImportError:
raise ImportError(
"Could not import bibtexparser python package. "
"Please install it with `pip install bibtexparser`."
)
return values
def load_bibtex_entries(self, path: str) -> List[Dict[str, Any]]:
"""Load bibtex entries from the bibtex file at the given path."""
import bibtexparser
with open(path) as file:
entries = bibtexparser.load(file).entries
return entries
def get_metadata(
self, entry: Mapping[str, Any], load_extra: bool = False
) -> Dict[str, Any]:
"""Get metadata for the given entry."""
publication = entry.get("journal") or entry.get("booktitle")
if "url" in entry:
url = entry["url"]
elif "doi" in entry:
url = f'https://doi.org/{entry["doi"]}'
else:
url = None
meta = {
"id": entry.get("ID"),
"published_year": entry.get("year"),
"title": entry.get("title"),
"publication": publication,
"authors": entry.get("author"),
"abstract": entry.get("abstract"),
"url": url,
}
if load_extra:
for field in OPTIONAL_FIELDS:
meta[field] = entry.get(field)
return {k: v for k, v in meta.items() if v is not None}