Skip to content

hext

This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON (ndjson) files, into Conjunctive. The store that backs the graph must be able to handle contexts, i.e. multiple graphs.

Classes:

__all__ module-attribute

__all__ = ['HextuplesParser']

HextuplesParser

HextuplesParser()

Bases: Parser

An RDFLib parser for Hextuples

Methods:

Attributes:

Source code in rdflib/plugins/parsers/hext.py
def __init__(self):
    super(HextuplesParser, self).__init__()
    self.default_context: Optional[Graph] = None
    self.skolemize = False

default_context instance-attribute

default_context: Optional[Graph] = None

skolemize instance-attribute

skolemize = False

parse

parse(source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None
Source code in rdflib/plugins/parsers/hext.py
def parse(self, source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None:  # type: ignore[override]
    if kwargs.get("encoding") not in [None, "utf-8"]:
        warnings.warn(
            f"Hextuples files are always utf-8 encoded, "
            f"I was passed: {kwargs.get('encoding')}, "
            "but I'm still going to use utf-8"
        )

    assert (
        graph.store.context_aware
    ), "Hextuples Parser needs a context-aware store!"

    self.skolemize = skolemize
    # Set default_union to True to mimic ConjunctiveGraph behavior
    ds = Dataset(store=graph.store, default_union=True)
    ds_default = ds.default_context  # the DEFAULT_DATASET_GRAPH_ID
    if isinstance(graph, (Dataset, ConjunctiveGraph)):
        self.default_context = graph.default_context
    elif graph.identifier is not None:
        if graph.identifier == ds_default.identifier:
            self.default_context = graph
        else:
            self.default_context = ds.get_context(graph.identifier)
    else:
        # mypy thinks this is unreachable, but graph.identifier can be None
        self.default_context = ds_default  # type: ignore[unreachable]
    if self.default_context is not ds_default:
        ds.default_context = self.default_context
        ds.remove_graph(ds_default)  # remove the original unused default graph

    try:
        text_stream: Optional[TextIO] = source.getCharacterStream()
    except (AttributeError, LookupError):
        text_stream = None
    try:
        binary_stream: Optional[BinaryIO] = source.getByteStream()
    except (AttributeError, LookupError):
        binary_stream = None

    if text_stream is None and binary_stream is None:
        raise ValueError(
            f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
        )
    if TYPE_CHECKING:
        assert text_stream is not None or binary_stream is not None
    use_stream: Union[TextIO, BinaryIO]
    if _HAS_ORJSON:
        if binary_stream is not None:
            use_stream = binary_stream
        else:
            if TYPE_CHECKING:
                assert isinstance(text_stream, TextIOWrapper)
            use_stream = text_stream
        loads = orjson.loads
    else:
        if text_stream is not None:
            use_stream = text_stream
        else:
            if TYPE_CHECKING:
                assert isinstance(binary_stream, BufferedReader)
            use_stream = TextIOWrapper(binary_stream, encoding="utf-8")
        loads = json.loads

    for line in use_stream:  # type: Union[str, bytes]
        if len(line) == 0 or line.isspace():
            # Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
            # The result is that we accept input that would otherwise be invalid.
            # Possibly we should just let this result in an error.
            continue
        # this complex handing is because the 'value' component is
        # allowed to be "" but not None
        # all other "" values are treated as None
        raw_line: List[str] = loads(line)
        hex_tuple_line = [x if x != "" else None for x in raw_line]
        if raw_line[2] == "":
            hex_tuple_line[2] = ""
        self._parse_hextuple(ds, hex_tuple_line)