Skip to content

Check

Check site.

check(options)

Check the site.

Source code in mccole/check.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def check(options):
    """Check the site."""
    dst_dir = Path(options.dst)

    paths = list(dst_dir.glob("**/index.html"))
    pages = {
        fp: BeautifulSoup(fp.read_text(encoding="utf-8"), "html.parser") for fp in paths
    }

    _check_tabs_in_markdown(options)

    _check_all_html(options, pages)
    _check_glossary_redefinitions(pages)

    _check_bibliography_alphabetical(options, pages)
    _check_bibliography_key_mismatch(options, pages)
    _check_bibliography_bare_isbns(options, pages)
    _check_glossary_alphabetical(options, pages)
    for kind in ["bibliography", "glossary"]:
        _check_cross_references(options, pages, kind)
        _check_unused_crossref_definitions(options, pages, kind)

    for func in [
        _check_empty_inclusions,
        _check_figure_structure,
        _check_single_h1,
        _check_table_structure,
        _check_unknown_links,
    ]:
        for path, doc in pages.items():
            func(options, path, doc)

_check_all_html(options, pages)

Validate generated HTML.

Source code in mccole/check.py
55
56
57
58
59
def _check_all_html(options, pages):
    """Validate generated HTML."""
    ignore = [DIV_IN_SUMMARY] if options.relaxed else []
    validator = Validator(ignore=ignore)
    validator.validate(list(pages.keys()))

_check_bibliography_alphabetical(options, pages)

Check that bibliography keys are in alphabetical order.

Source code in mccole/check.py
62
63
64
65
66
67
68
def _check_bibliography_alphabetical(options, pages):
    """Check that bibliography keys are in alphabetical order."""
    known = _get_crossref_definitions(options, pages, "bibliography")
    for i in range(1, len(known)):
        _require(
            "bibliography", known[i] >= known[i - 1], f"out-of-order key {known[i]}"
        )

_check_glossary_alphabetical(options, pages)

Check that glossary terms are in alphabetical order by lower-case term text.

Source code in mccole/check.py
71
72
73
74
75
76
77
78
79
def _check_glossary_alphabetical(options, pages):
    """Check that glossary terms are in alphabetical order by lower-case term text."""
    terms = _get_glossary_term_texts(options, pages)
    for i in range(1, len(terms)):
        _require(
            "glossary",
            terms[i].lower() >= terms[i - 1].lower(),
            f"out-of-order term '{terms[i]}'",
        )

_check_cross_references(options, pages, kind)

Check that all cross-references match entries.

Source code in mccole/check.py
82
83
84
85
86
87
88
89
90
91
def _check_cross_references(options, pages, kind):
    """Check that all cross-references match entries."""
    known = set(_get_crossref_definitions(options, pages, kind))
    prefix = f"/{kind}/#"
    for path, doc in pages.items():
        for node in doc.select("a[href]"):
            if prefix not in node["href"]:
                continue
            key = node["href"].split("#")[-1]
            _require(path, key in known, f"unknown {kind} key {key}")

_check_element_structure(filepath, doc, selector, kind, caption_selector, pattern)

Check that figure-like elements have IDs and captions.

Source code in mccole/check.py
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def _check_element_structure(filepath, doc, selector, kind, caption_selector, pattern):
    """Check that figure-like elements have IDs and captions."""
    for node in doc.select(selector):
        if not _require(filepath, "id" in node.attrs, f"{kind} missing 'id'"):
            continue
        captions = node.select(caption_selector)
        if not _require(
            filepath, len(captions) == 1, f"missing/extra {kind} caption(s)"
        ):
            continue
        text = captions[0].get_text()
        _require(
            filepath, pattern.match(text), f"badly-formatted {kind} caption '{text}'"
        )

_check_empty_inclusions(options, filepath, doc)

Report %inc inclusions whose generated content is only whitespace.

Source code in mccole/check.py
110
111
112
113
114
115
116
117
def _check_empty_inclusions(options, filepath, doc):
    """Report %inc inclusions whose generated content is only whitespace."""
    for node in doc.select("div[data-inc]"):
        icon = node.find("span", class_="inc-path")
        inc_path = icon["title"] if icon else node.get("data-inc", "unknown")
        pre = node.find("pre")
        if pre is not None and not pre.get_text().strip():
            _require(filepath, False, f"empty inclusion of {inc_path}")

_check_figure_structure(options, filepath, doc)

Check that all figures have IDs and captions.

Source code in mccole/check.py
120
121
122
123
124
def _check_figure_structure(options, filepath, doc):
    """Check that all figures have IDs and captions."""
    _check_element_structure(
        filepath, doc, "figure", "figure", "figcaption", RE_FIGURE_CAPTION
    )

_check_glossary_redefinitions(pages)

Check for glossary terms that are defined more than once.

Source code in mccole/check.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def _check_glossary_redefinitions(pages):
    """Check for glossary terms that are defined more than once."""
    seen = defaultdict(list)
    for path, doc in pages.items():
        for node in doc.select("a[href]"):
            if ("/glossary/#" in node["href"]) and (
                "term-defined" not in node.get("class", [])
            ):
                key = node["href"].split("#")[-1]
                seen[key].append(path)
    for key, values in seen.items():
        _require(
            GLOBAL,
            len(values) == 1,
            f"glossary entry '{key}' defined in {', '.join(sorted(str(v) for v in values))}",
        )

_check_single_h1(options, filepath, doc)

Check that all pages have a single H1.

Source code in mccole/check.py
145
146
147
148
def _check_single_h1(options, filepath, doc):
    """Check that all pages have a single H1."""
    titles = doc.find_all("h1")
    _require(filepath, len(titles) == 1, f" {filepath} has {len(titles)} H1 elements")

_check_table_structure(options, filepath, doc)

Check that all tables have proper structure and IDs.

Source code in mccole/check.py
151
152
153
154
155
def _check_table_structure(options, filepath, doc):
    """Check that all tables have proper structure and IDs."""
    _check_element_structure(
        filepath, doc, "div[data-caption]", "table", "caption", RE_TABLE_CAPTION
    )

Look for unresolved Markdown links.

Source code in mccole/check.py
158
159
160
161
162
163
164
165
166
def _check_unknown_links(options, filepath, doc):
    """Look for unresolved Markdown links."""
    unwanted = {"code", "pre"}
    for text in doc.find_all(string=lambda s: s and "][" in s):
        _require(
            filepath,
            any(p.name in unwanted for p in text.parents),
            f"possible unresolved Markdown link '{text}'",
        )

_check_tabs_in_markdown(options)

Report tab characters in Markdown source files.

Source code in mccole/check.py
169
170
171
172
173
174
175
176
177
178
179
180
181
def _check_tabs_in_markdown(options):
    """Report tab characters in Markdown source files."""
    order = util.load_order(options.src, options.root)
    md_paths = [options.src / options.root]
    md_paths.extend(entry["filepath"] for entry in order.values())
    for md_path in sorted(md_paths):
        for line_num, line in enumerate(
            md_path.read_text(encoding="utf-8").splitlines(), start=1
        ):
            if "\t" in line:
                _require(
                    f"{md_path}:{line_num}", False, "tab character in Markdown source"
                )

_get_glossary_term_texts(options, pages)

Get glossary term texts (not IDs) in document order.

Source code in mccole/check.py
184
185
186
187
188
189
190
def _get_glossary_term_texts(options, pages):
    """Get glossary term texts (not IDs) in document order."""
    path = Path(options.dst, "glossary", "index.html")
    if not _require(GLOBAL, path in pages, f"glossary {path} not found"):
        return []
    doc = pages[path]
    return [dt.get_text().strip() for dt in doc.find_all("dt")]

_get_crossref_definitions(options, pages, kind)

Get set of known cross-reference keys.

Source code in mccole/check.py
193
194
195
196
197
198
199
def _get_crossref_definitions(options, pages, kind):
    """Get set of known cross-reference keys."""
    path = Path(options.dst, kind, "index.html")
    if not _require(GLOBAL, path in pages, f"{kind} {path} not found"):
        return []
    doc = pages[path]
    return [outer.find("span").attrs["id"] for outer in doc.find_all("dt")]

_get_crossref_usage(pages, kind)

Get the set of referenced keys for one cross-reference kind.

Source code in mccole/check.py
202
203
204
205
206
207
208
209
210
211
212
213
214
def _get_crossref_usage(pages, kind):
    """Get the set of referenced keys for one cross-reference kind."""
    used = set()
    prefix = f"/{kind}/#"
    for doc in pages.values():
        for node in doc.select("a[href]"):
            href = node["href"]
            if prefix not in href:
                continue
            if (kind == "glossary") and ("term-defined" in node.get("class", [])):
                continue
            used.add(href.split("#")[-1])
    return used

_check_bibliography_key_mismatch(options, pages)

Check that each bibliography span id matches its text content.

Source code in mccole/check.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def _check_bibliography_key_mismatch(options, pages):
    """Check that each bibliography span id matches its text content."""
    path = Path(options.dst, "bibliography", "index.html")
    if not _require(GLOBAL, path in pages, f"bibliography {path} not found"):
        return
    doc = pages[path]
    for dt in doc.find_all("dt"):
        span = dt.find("span", id=True)
        if span is None:
            continue
        span_id = span["id"]
        span_text = span.get_text().strip()
        _require(
            "bibliography",
            span_id == span_text,
            f"key mismatch: id='{span_id}' text='{span_text}'",
        )

_check_bibliography_bare_isbns(options, pages)

Warn about ISBN strings in bibliography definitions that are not hyperlinked.

Source code in mccole/check.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def _check_bibliography_bare_isbns(options, pages):
    """Warn about ISBN strings in bibliography definitions that are not hyperlinked."""
    path = Path(options.dst, "bibliography", "index.html")
    if path not in pages:
        return
    doc = pages[path]
    for dd in doc.find_all("dd"):
        bare_text = "".join(
            str(s)
            for s in dd.strings
            if not any(parent.name == "a" for parent in s.parents)
        )
        for match in RE_BARE_ISBN.finditer(bare_text):
            print(
                f"warning: bibliography: bare ISBN '{match.group()}'", file=sys.stderr
            )

_check_unused_crossref_definitions(options, pages, kind)

Report defined cross-reference entries that are never referenced.

Source code in mccole/check.py
257
258
259
260
261
262
def _check_unused_crossref_definitions(options, pages, kind):
    """Report defined cross-reference entries that are never referenced."""
    known = set(_get_crossref_definitions(options, pages, kind))
    used = _get_crossref_usage(pages, kind)
    for key in sorted(known - used):
        _require(GLOBAL, False, f"unused {kind} key {key}")

_require(filepath, condition, message)

Manage warning messages.

Source code in mccole/check.py
265
266
267
268
269
def _require(filepath, condition, message):
    """Manage warning messages."""
    if not condition:
        print(f"{filepath}: {message}", file=sys.stderr)
    return condition