langchain/libs/experimental/langchain_experimental/video_captioning/base.py
Eugene Yurtsev b9f65e5038
experimental[patch]: Migrate pydantic extra to literals (#25194)
Migrate pydantic extra to literals

Upgrade to using a literal for specifying the extra which is the
recommended approach in pydantic 2.

This works correctly also in pydantic v1.

```python
from pydantic.v1 import BaseModel

class Foo(BaseModel, extra="forbid"):
    x: int

Foo(x=5, y=1)
```

And 


```python
from pydantic.v1 import BaseModel

class Foo(BaseModel):
    x: int

    class Config:
      extra = "forbid"

Foo(x=5, y=1)
```


## Enum -> literal using grit pattern:

```
engine marzano(0.1)
language python
or {
    `extra=Extra.allow` => `extra="allow"`,
    `extra=Extra.forbid` => `extra="forbid"`,
    `extra=Extra.ignore` => `extra="ignore"`
}
```

Resorted attributes in config and removed doc-string in case we will
need to deal with going back and forth between pydantic v1 and v2 during
the 0.3 release. (This will reduce merge conflicts.)


## Sort attributes in Config:

```
engine marzano(0.1)
language python


function sort($values) js {
    return $values.text.split(',').sort().join("\n");
}


class_definition($name, $body) as $C where {
    $name <: `Config`,
    $body <: block($statements),
    $values = [],
    $statements <: some bubble($values) assignment() as $A where {
        $values += $A
    },
    $body => sort($values),
}

```
2024-08-08 19:05:54 +00:00

148 lines
4.9 KiB
Python

from typing import Any, Dict, List, Optional
from langchain.chains.base import Chain
from langchain_core.callbacks import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import PromptTemplate
from langchain_experimental.video_captioning.services.audio_service import (
AudioProcessor,
)
from langchain_experimental.video_captioning.services.caption_service import (
CaptionProcessor,
)
from langchain_experimental.video_captioning.services.combine_service import (
CombineProcessor,
)
from langchain_experimental.video_captioning.services.image_service import (
ImageProcessor,
)
from langchain_experimental.video_captioning.services.srt_service import SRTProcessor
class VideoCaptioningChain(Chain):
"""
Video Captioning Chain.
"""
llm: BaseLanguageModel
assemblyai_key: str
prompt: Optional[PromptTemplate] = None
verbose: bool = True
use_logging: Optional[bool] = True
frame_skip: int = -1
image_delta_threshold: int = 3000000
closed_caption_char_limit: int = 20
closed_caption_similarity_threshold: int = 80
use_unclustered_video_models: bool = False
class Config:
arbitrary_types_allowed = True
extra = "allow"
@property
def input_keys(self) -> List[str]:
return ["video_file_path"]
@property
def output_keys(self) -> List[str]:
return ["srt"]
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, str]:
if "video_file_path" not in inputs:
raise ValueError(
"Missing 'video_file_path' in inputs for video captioning."
)
video_file_path = inputs["video_file_path"]
nl = "\n"
run_manager.on_text(
"Loading processors..." + nl
) if self.use_logging and run_manager else None
audio_processor = AudioProcessor(api_key=self.assemblyai_key)
image_processor = ImageProcessor(
frame_skip=self.frame_skip, threshold=self.image_delta_threshold
)
caption_processor = CaptionProcessor(
llm=self.llm,
verbose=self.verbose,
similarity_threshold=self.closed_caption_similarity_threshold,
use_unclustered_models=self.use_unclustered_video_models,
)
combine_processor = CombineProcessor(
llm=self.llm,
verbose=self.verbose,
char_limit=self.closed_caption_char_limit,
)
srt_processor = SRTProcessor()
run_manager.on_text(
"Finished loading processors."
+ nl
+ "Generating subtitles from audio..."
+ nl
) if self.use_logging and run_manager else None
# Get models for speech to text subtitles
audio_models = audio_processor.process(video_file_path, run_manager)
run_manager.on_text(
"Finished generating subtitles:"
+ nl
+ f"{nl.join(str(obj) for obj in audio_models)}"
+ nl
+ "Generating closed captions from video..."
+ nl
) if self.use_logging and run_manager else None
# Get models for image frame description
image_models = image_processor.process(video_file_path, run_manager)
run_manager.on_text(
"Finished generating closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in image_models)}"
+ nl
+ "Refining closed captions..."
+ nl
) if self.use_logging and run_manager else None
# Get models for video event closed-captions
video_models = caption_processor.process(image_models, run_manager)
run_manager.on_text(
"Finished refining closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in video_models)}"
+ nl
+ "Combining subtitles with closed captions..."
+ nl
) if self.use_logging and run_manager else None
# Combine the subtitle models with the closed-caption models
caption_models = combine_processor.process(
video_models, audio_models, run_manager
)
run_manager.on_text(
"Finished combining subtitles with closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in caption_models)}"
+ nl
+ "Generating SRT file..."
+ nl
) if self.use_logging and run_manager else None
# Convert the combined model to SRT format
srt_content = srt_processor.process(caption_models)
run_manager.on_text(
"Finished generating srt file." + nl
) if self.use_logging and run_manager else None
return {"srt": srt_content}
@property
def _chain_type(self) -> str:
return "video_captioning_chain"