53 lines
2.1 KiB
Python
53 lines
2.1 KiB
Python
from typing import Any, Dict, Type
|
|
|
|
from llama_index.readers.base import BasePydanticReader
|
|
from llama_index.readers.discord_reader import DiscordReader
|
|
from llama_index.readers.elasticsearch import ElasticsearchReader
|
|
from llama_index.readers.google_readers.gdocs import GoogleDocsReader
|
|
from llama_index.readers.google_readers.gsheets import GoogleSheetsReader
|
|
from llama_index.readers.notion import NotionPageReader
|
|
from llama_index.readers.slack import SlackReader
|
|
from llama_index.readers.string_iterable import StringIterableReader
|
|
from llama_index.readers.twitter import TwitterTweetReader
|
|
from llama_index.readers.web import (
|
|
BeautifulSoupWebReader,
|
|
RssReader,
|
|
SimpleWebPageReader,
|
|
TrafilaturaWebReader,
|
|
)
|
|
from llama_index.readers.wikipedia import WikipediaReader
|
|
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
|
|
|
ALL_READERS: Dict[str, Type[BasePydanticReader]] = {
|
|
DiscordReader.class_name(): DiscordReader,
|
|
ElasticsearchReader.class_name(): ElasticsearchReader,
|
|
GoogleDocsReader.class_name(): GoogleDocsReader,
|
|
GoogleSheetsReader.class_name(): GoogleSheetsReader,
|
|
NotionPageReader.class_name(): NotionPageReader,
|
|
SlackReader.class_name(): SlackReader,
|
|
StringIterableReader.class_name(): StringIterableReader,
|
|
TwitterTweetReader.class_name(): TwitterTweetReader,
|
|
SimpleWebPageReader.class_name(): SimpleWebPageReader,
|
|
TrafilaturaWebReader.class_name(): TrafilaturaWebReader,
|
|
RssReader.class_name(): RssReader,
|
|
BeautifulSoupWebReader.class_name(): BeautifulSoupWebReader,
|
|
WikipediaReader.class_name(): WikipediaReader,
|
|
YoutubeTranscriptReader.class_name(): YoutubeTranscriptReader,
|
|
}
|
|
|
|
|
|
def load_reader(data: Dict[str, Any]) -> BasePydanticReader:
|
|
if isinstance(data, BasePydanticReader):
|
|
return data
|
|
class_name = data.get("class_name", None)
|
|
if class_name is None:
|
|
raise ValueError("Must specify `class_name` in reader data.")
|
|
|
|
if class_name not in ALL_READERS:
|
|
raise ValueError(f"Reader class name {class_name} not found.")
|
|
|
|
# remove static attribute
|
|
data.pop("is_remote", None)
|
|
|
|
return ALL_READERS[class_name].from_dict(data)
|