This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON
(ndjson) files, into Conjunctive. The store that backs the graph must be able to
handle contexts, i.e. multiple graphs.
Classes:
__all__
module-attribute
__all__ = ['HextuplesParser']
HextuplesParser
Bases: Parser
An RDFLib parser for Hextuples
Methods:
Attributes:
Source code in rdflib/plugins/parsers/hext.py
| def __init__(self):
super(HextuplesParser, self).__init__()
self.default_context: Optional[Graph] = None
self.skolemize = False
|
default_context
instance-attribute
default_context: Optional[Graph] = None
skolemize
instance-attribute
parse
parse(source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None
Source code in rdflib/plugins/parsers/hext.py
| def parse(self, source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None: # type: ignore[override]
if kwargs.get("encoding") not in [None, "utf-8"]:
warnings.warn(
f"Hextuples files are always utf-8 encoded, "
f"I was passed: {kwargs.get('encoding')}, "
"but I'm still going to use utf-8"
)
assert (
graph.store.context_aware
), "Hextuples Parser needs a context-aware store!"
self.skolemize = skolemize
# Set default_union to True to mimic ConjunctiveGraph behavior
ds = Dataset(store=graph.store, default_union=True)
ds_default = ds.default_context # the DEFAULT_DATASET_GRAPH_ID
if isinstance(graph, (Dataset, ConjunctiveGraph)):
self.default_context = graph.default_context
elif graph.identifier is not None:
if graph.identifier == ds_default.identifier:
self.default_context = graph
else:
self.default_context = ds.get_context(graph.identifier)
else:
# mypy thinks this is unreachable, but graph.identifier can be None
self.default_context = ds_default # type: ignore[unreachable]
if self.default_context is not ds_default:
ds.default_context = self.default_context
ds.remove_graph(ds_default) # remove the original unused default graph
try:
text_stream: Optional[TextIO] = source.getCharacterStream()
except (AttributeError, LookupError):
text_stream = None
try:
binary_stream: Optional[BinaryIO] = source.getByteStream()
except (AttributeError, LookupError):
binary_stream = None
if text_stream is None and binary_stream is None:
raise ValueError(
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
)
if TYPE_CHECKING:
assert text_stream is not None or binary_stream is not None
use_stream: Union[TextIO, BinaryIO]
if _HAS_ORJSON:
if binary_stream is not None:
use_stream = binary_stream
else:
if TYPE_CHECKING:
assert isinstance(text_stream, TextIOWrapper)
use_stream = text_stream
loads = orjson.loads
else:
if text_stream is not None:
use_stream = text_stream
else:
if TYPE_CHECKING:
assert isinstance(binary_stream, BufferedReader)
use_stream = TextIOWrapper(binary_stream, encoding="utf-8")
loads = json.loads
for line in use_stream: # type: Union[str, bytes]
if len(line) == 0 or line.isspace():
# Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
# The result is that we accept input that would otherwise be invalid.
# Possibly we should just let this result in an error.
continue
# this complex handing is because the 'value' component is
# allowed to be "" but not None
# all other "" values are treated as None
raw_line: List[str] = loads(line)
hex_tuple_line = [x if x != "" else None for x in raw_line]
if raw_line[2] == "":
hex_tuple_line[2] = ""
self._parse_hextuple(ds, hex_tuple_line)
|