文档存储

class llama_index.storage.docstore.BaseDocumentStore
abstract delete_document(doc_id: str, raise_error: bool = True) None

Delete a document from the store.

abstract delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None

Delete a ref_doc and all it's associated nodes.

abstract get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_node(node_id: str, raise_error: bool = True) Node

Get node from docstore.

参数
  • node_id (str) -- node id

  • raise_error (bool) -- raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, Node]

Get node dict from docstore given a mapping of index to node ids.

参数

node_id_dict (Dict[int, str]) -- mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[Node]

Get nodes from docstore.

参数
  • node_ids (List[str]) -- node ids

  • raise_error (bool) -- raise error if node_id not found

abstract get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None

Persist the docstore to a file.

llama_index.storage.docstore.DocumentStore

SimpleDocumentStore 的别名

class llama_index.storage.docstore.KVDocumentStore(kvstore: BaseKVStore, namespace: Optional[str] = None)

Document (Node) store.

NOTE: at the moment, this store is primarily used to store Node objects. Each node will be assigned an ID.

The same docstore can be reused across index structures. This allows you to reuse the same storage for multiple index structures; otherwise, each index would create a docstore under the hood.

This will use the same docstore for multiple index structures.

参数
  • kvstore (BaseKVStore) -- key-value store

  • namespace (str) -- namespace for the docstore

add_documents(docs: Sequence[BaseDocument], allow_update: bool = True) None

Add a document to the store.

参数
  • docs (List[BaseDocument]) -- documents

  • allow_update (bool) -- allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None

Delete a ref_doc and all it's associated nodes.

property docs: Dict[str, BaseDocument]

Get all documents.

返回

documents

返回类型

Dict[str, BaseDocument]

document_exists(doc_id: str) bool

Check if document exists.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseDocument]

Get a document from the store.

参数
  • doc_id (str) -- document id

  • raise_error (bool) -- raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) Node

Get node from docstore.

参数
  • node_id (str) -- node id

  • raise_error (bool) -- raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, Node]

Get node dict from docstore given a mapping of index to node ids.

参数

node_id_dict (Dict[int, str]) -- mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[Node]

Get nodes from docstore.

参数
  • node_ids (List[str]) -- node ids

  • raise_error (bool) -- raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None

Set the hash for a given doc_id.

class llama_index.storage.docstore.MongoDocumentStore(mongo_kvstore: MongoDBKVStore, namespace: Optional[str] = None)

Mongo Document (Node) store.

A MongoDB store for Document and Node objects.

参数
  • mongo_kvstore (MongoDBKVStore) -- MongoDB key-value store

  • namespace (str) -- namespace for the docstore

add_documents(docs: Sequence[BaseDocument], allow_update: bool = True) None

Add a document to the store.

参数
  • docs (List[BaseDocument]) -- documents

  • allow_update (bool) -- allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None

Delete a ref_doc and all it's associated nodes.

property docs: Dict[str, BaseDocument]

Get all documents.

返回

documents

返回类型

Dict[str, BaseDocument]

document_exists(doc_id: str) bool

Check if document exists.

classmethod from_host_and_port(host: str, port: int, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore

Load a MongoDocumentStore from a MongoDB host and port.

classmethod from_uri(uri: str, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore

Load a MongoDocumentStore from a MongoDB URI.

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseDocument]

Get a document from the store.

参数
  • doc_id (str) -- document id

  • raise_error (bool) -- raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) Node

Get node from docstore.

参数
  • node_id (str) -- node id

  • raise_error (bool) -- raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, Node]

Get node dict from docstore given a mapping of index to node ids.

参数

node_id_dict (Dict[int, str]) -- mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[Node]

Get nodes from docstore.

参数
  • node_ids (List[str]) -- node ids

  • raise_error (bool) -- raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None

Persist the docstore to a file.

ref_doc_exists(ref_doc_id: str) bool

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None

Set the hash for a given doc_id.

class llama_index.storage.docstore.SimpleDocumentStore(simple_kvstore: Optional[SimpleKVStore] = None, namespace: Optional[str] = None)

Simple Document (Node) store.

An in-memory store for Document and Node objects.

参数
  • simple_kvstore (SimpleKVStore) -- simple key-value store

  • namespace (str) -- namespace for the docstore

add_documents(docs: Sequence[BaseDocument], allow_update: bool = True) None

Add a document to the store.

参数
  • docs (List[BaseDocument]) -- documents

  • allow_update (bool) -- allow update of docstore from document

delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None

Delete a document from the store.

delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None

Delete a ref_doc and all it's associated nodes.

property docs: Dict[str, BaseDocument]

Get all documents.

返回

documents

返回类型

Dict[str, BaseDocument]

document_exists(doc_id: str) bool

Check if document exists.

classmethod from_persist_dir(persist_dir: str = './storage', namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore

Create a SimpleDocumentStore from a persist directory.

参数
  • persist_dir (str) -- directory to persist the store

  • namespace (Optional[str]) -- namespace for the docstore

  • fs (Optional[fsspec.AbstractFileSystem]) -- filesystem to use

classmethod from_persist_path(persist_path: str, namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore

Create a SimpleDocumentStore from a persist path.

参数
  • persist_path (str) -- Path to persist the store

  • namespace (Optional[str]) -- namespace for the docstore

  • fs (Optional[fsspec.AbstractFileSystem]) -- filesystem to use

get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]]

Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.

get_document(doc_id: str, raise_error: bool = True) Optional[BaseDocument]

Get a document from the store.

参数
  • doc_id (str) -- document id

  • raise_error (bool) -- raise error if doc_id not found

get_document_hash(doc_id: str) Optional[str]

Get the stored hash for a document, if it exists.

get_node(node_id: str, raise_error: bool = True) Node

Get node from docstore.

参数
  • node_id (str) -- node id

  • raise_error (bool) -- raise error if node_id not found

get_node_dict(node_id_dict: Dict[int, str]) Dict[int, Node]

Get node dict from docstore given a mapping of index to node ids.

参数

node_id_dict (Dict[int, str]) -- mapping of index to node ids

get_nodes(node_ids: List[str], raise_error: bool = True) List[Node]

Get nodes from docstore.

参数
  • node_ids (List[str]) -- node ids

  • raise_error (bool) -- raise error if node_id not found

get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo]

Get the RefDocInfo for a given ref_doc_id.

persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None

Persist the store.

ref_doc_exists(ref_doc_id: str) bool

Check if a ref_doc_id has been ingested.

set_document_hash(doc_id: str, doc_hash: str) None

Set the hash for a given doc_id.