Skip to content

jsonld

This parser will interpret a JSON-LD document as an RDF Graph. See http://json-ld.org/

Example
>>> from rdflib import Graph, URIRef, Literal
>>> test_json = '''
... {
...     "@context": {
...         "dc": "http://purl.org/dc/terms/",
...         "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
...         "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
...     },
...     "@id": "http://example.org/about",
...     "dc:title": {
...         "@language": "en",
...         "@value": "Someone's Homepage"
...     }
... }
... '''
>>> g = Graph().parse(data=test_json, format='json-ld')
>>> list(g) == [(URIRef('http://example.org/about'),
...     URIRef('http://purl.org/dc/terms/title'),
...     Literal("Someone's Homepage", lang='en'))]
True

Classes:

Functions:

ALLOW_LISTS_OF_LISTS module-attribute

ALLOW_LISTS_OF_LISTS = True

TYPE_TERM module-attribute

TYPE_TERM = Term(str(type), TYPE, VOCAB)

__all__ module-attribute

__all__ = ['JsonLDParser', 'to_rdf']

JsonLDParser

JsonLDParser()

Bases: Parser

Methods:

  • parse

    Parse JSON-LD from a source document.

Source code in rdflib/plugins/parsers/jsonld.py
def __init__(self):
    super(JsonLDParser, self).__init__()

parse

parse(source: InputSource, sink: Graph, version: float = 1.1, skolemize: bool = False, encoding: Optional[str] = 'utf-8', base: Optional[str] = None, context: Optional[Union[List[Union[Dict[str, Any], str, None]], Dict[str, Any], str]] = None, generalized_rdf: Optional[bool] = False, extract_all_scripts: Optional[bool] = False, **kwargs: Any) -> None

Parse JSON-LD from a source document.

The source document can be JSON or HTML with embedded JSON script elements (type attribute = application/ld+json). To process as HTML source.content_type must be set to “text/html” or `application/xhtml+xml.

Parameters:

  • source

    (InputSource) –

    InputSource with JSON-formatted data (JSON or HTML)

  • sink

    (Graph) –

    Graph to receive the parsed triples

  • version

    (float, default: 1.1 ) –

    parse as JSON-LD version, defaults to 1.1

  • skolemize

    (bool, default: False ) –

    whether to skolemize blank nodes, defaults to False

  • encoding

    (Optional[str], default: 'utf-8' ) –

    character encoding of the JSON (should be “utf-8”

  • base

    (Optional[str], default: None ) –

    JSON-LD Base IRI, defaults to None

  • context

    (Optional[Union[List[Union[Dict[str, Any], str, None]], Dict[str, Any], str]], default: None ) –

    JSON-LD Context, defaults to None

  • generalized_rdf

    (Optional[bool], default: False ) –

    parse as Generalized RDF, defaults to False

  • extract_all_scripts

    (Optional[bool], default: False ) –

    if source is an HTML document then extract script element). This is ignored if source.system_id contains a fragment identifier, in which case only the script element with matching id attribute is extracted.

Source code in rdflib/plugins/parsers/jsonld.py
def parse(
    self,
    source: InputSource,
    sink: Graph,
    version: float = 1.1,
    skolemize: bool = False,
    encoding: Optional[str] = "utf-8",
    base: Optional[str] = None,
    context: Optional[
        Union[
            List[Union[Dict[str, Any], str, None]],
            Dict[str, Any],
            str,
        ]
    ] = None,
    generalized_rdf: Optional[bool] = False,
    extract_all_scripts: Optional[bool] = False,
    **kwargs: Any,
) -> None:
    """Parse JSON-LD from a source document.

    The source document can be JSON or HTML with embedded JSON script
    elements (type attribute = `application/ld+json`). To process as HTML
    `source.content_type` must be set to "text/html" or
    `application/xhtml+xml.

    Args:
        source: InputSource with JSON-formatted data (JSON or HTML)
        sink: Graph to receive the parsed triples
        version: parse as JSON-LD version, defaults to 1.1
        skolemize: whether to skolemize blank nodes, defaults to False
        encoding: character encoding of the JSON (should be "utf-8"
        base: JSON-LD [Base IRI](https://www.w3.org/TR/json-ld/#base-iri), defaults to None
        context: JSON-LD [Context](https://www.w3.org/TR/json-ld/#the-context), defaults to None
        generalized_rdf: parse as [Generalized RDF](https://www.w3.org/TR/json-ld/#relationship-to-rdf), defaults to False
        extract_all_scripts: if source is an HTML document then extract
            script element). This is ignored if `source.system_id` contains
            a fragment identifier, in which case only the script element with
            matching id attribute is extracted.
    """
    if encoding not in ("utf-8", "utf-16"):
        warnings.warn(
            "JSON should be encoded as unicode. "
            "Given encoding was: %s" % encoding
        )

    if not base:
        base = sink.absolutize(source.getPublicId() or source.getSystemId() or "")

    context_data = context
    if not context_data and hasattr(source, "url") and hasattr(source, "links"):
        if TYPE_CHECKING:
            assert isinstance(source, URLInputSource)
        context_data = context_from_urlinputsource(source)

    try:
        version = float(version)
    except ValueError:
        version = 1.1

    # Get the optional fragment identifier
    try:
        fragment_id = URIRef(source.getSystemId()).fragment
    except Exception:
        fragment_id = None

    data, html_base = source_to_json(source, fragment_id, extract_all_scripts)
    if html_base is not None:
        base = URIRef(html_base, base=base)

    # NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be
    # context_aware. Keeping this check in case RDFLib is changed, or
    # someone passes something context_aware to this parser directly.
    conj_sink: Graph
    if not sink.context_aware:
        conj_sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier)
    else:
        conj_sink = sink

    to_rdf(
        data,
        conj_sink,
        base,
        context_data,
        version,
        bool(generalized_rdf),
        skolemize=skolemize,
    )

Parser

Parser(generalized_rdf: bool = False, allow_lists_of_lists: Optional[bool] = None, skolemize: bool = False)

Methods:

Attributes:

Source code in rdflib/plugins/parsers/jsonld.py
def __init__(
    self,
    generalized_rdf: bool = False,
    allow_lists_of_lists: Optional[bool] = None,
    skolemize: bool = False,
):
    self.skolemize = skolemize
    self.generalized_rdf = generalized_rdf
    self.allow_lists_of_lists = (
        allow_lists_of_lists
        if allow_lists_of_lists is not None
        else ALLOW_LISTS_OF_LISTS
    )
    self.invalid_uri_to_bnode: dict[str, BNode] = {}

allow_lists_of_lists instance-attribute

allow_lists_of_lists = allow_lists_of_lists if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS

generalized_rdf instance-attribute

generalized_rdf = generalized_rdf

invalid_uri_to_bnode instance-attribute

invalid_uri_to_bnode: dict[str, BNode] = {}

skolemize instance-attribute

skolemize = skolemize

parse

parse(data: Any, context: Context, dataset: Graph) -> Graph
Source code in rdflib/plugins/parsers/jsonld.py
def parse(self, data: Any, context: Context, dataset: Graph) -> Graph:
    topcontext = False
    resources: Union[Dict[str, Any], List[Any]]
    if isinstance(data, list):
        resources = data
    elif isinstance(data, dict):
        local_context = data.get(CONTEXT)
        if local_context:
            context.load(local_context, context.base)
            topcontext = True
        resources = data
        # type error: Subclass of "Dict[str, Any]" and "List[Any]" cannot exist: would have incompatible method signatures
        if not isinstance(resources, list):  # type: ignore[unreachable]
            resources = [resources]

    if context.vocab:
        dataset.bind(None, context.vocab)
    for name, term in context.terms.items():
        if term.id and term.id.endswith(VOCAB_DELIMS):
            dataset.bind(name, term.id)

    # type error: "Graph" has no attribute "default_context"
    graph = dataset.default_context if dataset.context_aware else dataset  # type: ignore[attr-defined]

    for node in resources:
        self._add_to_graph(dataset, graph, context, node, topcontext)

    return graph

to_rdf

to_rdf(data: Any, dataset: Graph, base: Optional[str] = None, context_data: Optional[Union[List[Union[Dict[str, Any], str, None]], Dict[str, Any], str]] = None, version: Optional[float] = None, generalized_rdf: bool = False, allow_lists_of_lists: Optional[bool] = None, skolemize: bool = False)
Source code in rdflib/plugins/parsers/jsonld.py
def to_rdf(
    data: Any,
    dataset: Graph,
    base: Optional[str] = None,
    context_data: Optional[
        Union[
            List[Union[Dict[str, Any], str, None]],
            Dict[str, Any],
            str,
        ]
    ] = None,
    version: Optional[float] = None,
    generalized_rdf: bool = False,
    allow_lists_of_lists: Optional[bool] = None,
    skolemize: bool = False,
):
    # TODO: docstring w. args and return value
    context = Context(base=base, version=version)
    if context_data:
        context.load(context_data)
    parser = Parser(
        generalized_rdf=generalized_rdf,
        allow_lists_of_lists=allow_lists_of_lists,
        skolemize=skolemize,
    )
    return parser.parse(data, context, dataset)