diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index e062db7c95..769768567a 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -138,7 +138,7 @@ jobs: go-version: ${{ env.GO_VERSION }} - name: Install system dependencies run: | - sudo apt-get update && sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc + sudo apt-get update && sudo apt-get -y install moreutils - name: Install open-policy-agent run: | cd "${GOPATH}/src/github.com/${GITHUB_REPOSITORY}" diff --git a/docs/Documentation-Requirements.md b/docs/Documentation-Requirements.md index 13f143a5fd..4f6b66acae 100644 --- a/docs/Documentation-Requirements.md +++ b/docs/Documentation-Requirements.md @@ -188,15 +188,14 @@ and compare them with standard tools (e.g. `diff(1)`). # Spelling Since this project uses a number of terms not found in conventional -dictionaries, we have a -[spell checking tool](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling) -that checks both dictionary words and the additional terms we use. +dictionaries, we have a [kata-dictionary](../tests/spellcheck/kata-dictionary.txt) +that contains some project specific terms we use. -Run the spell checking tool on your document before raising a PR to ensure it +You can run the `cspell` checking tool on your document before raising a PR to ensure it is free of mistakes. If your document introduces new terms, you need to update the custom -dictionary used by the spell checking tool to incorporate the new words. +dictionary to incorporate the new words. # Names diff --git a/docs/code-pr-advice.md b/docs/code-pr-advice.md index 3360c749f1..8dd0138725 100644 --- a/docs/code-pr-advice.md +++ b/docs/code-pr-advice.md @@ -231,12 +231,6 @@ Run the [markdown checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-markdown) on your documentation changes. -### Spell check - -Run the -[spell checker](https://github.com/kata-containers/kata-containers/tree/main/tests/cmd/check-spelling) -on your documentation changes. - ## Finally You may wish to read the documentation that the diff --git a/tests/cmd/check-spelling/README.md b/tests/cmd/check-spelling/README.md deleted file mode 100644 index d4498756b8..0000000000 --- a/tests/cmd/check-spelling/README.md +++ /dev/null @@ -1,178 +0,0 @@ -# Spell check tool - -## Overview - -The `kata-spell-check.sh` tool is used to check a markdown file for -typographical (spelling) mistakes. - -## Approach - -The spell check tool is based on -[`hunspell`](https://github.com/hunspell/hunspell). It uses standard Hunspell -English dictionaries and supplements these with a custom Hunspell dictionary. -The document is cleaned of several entities before the spell-check begins. -These entities include the following: - -- URLs -- Email addresses -- Code blocks -- Most punctuation -- GitHub user ids - -## Custom words - -A custom dictionary is required to accept specific words that are either well -understood by the community or are defined in various document files, but do -not appear in standard dictionaries. The custom dictionaries allow those words -to be accepted as correct. The following lists common examples of such words: - -- Abbreviations -- Acronyms -- Company names -- Product names -- Project names -- Technical terms - -## Spell check a document file - -```sh -$ ./kata-spell-check.sh check /path/to/file -``` - -> **Note:** If you have made local edits to the dictionaries, you may -> [re-create the master dictionary files](#create-the-master-dictionary-files) -> as documented in the [Adding a new word](#adding-a-new-word) section, -> in order for your local edits take effect. - -## Other options - -Lists all available options and commands: - -```sh -$ ./kata-spell-check.sh -h -``` - -## Technical details - -### Hunspell dictionary format - -A Hunspell dictionary comprises two text files: - -- A word list file - - This file defines a list of words (one per line). The list includes optional - references to one or more rules defined in the rules file as well as optional - comments. Specify fixed words (e.g. company names) verbatim. Enter “normal” - words in their root form. - - The root form of a "normal" word is the simplest and shortest form of that - word. For example, the following list of words are all formed from the root - word "computer": - - - Computers - - Computer’s - - Computing - - Computed - - Each word in the previous list is an example of using the word "computer" to - construct said word through a combination of applying the following - manipulations: - - - Remove one or more characters from the end of the word. - - Add a new ending. - - Therefore, you list the root word "computer" in the word list file. - -- A rules file - - This file defines named manipulations to apply to root words to form new - words. For example, rules that make a root word plural. - -### Source files - -The rules file and the the word list file for the custom dictionary generate -from "source" fragment files in the [`data`](data/) directory. - -All the fragment files allow comments using the hash (`#`) comment -symbol and all files contain a comment header explaining their content. - -#### Word list file fragments - -The `*.txt` files are word list file fragments. Splitting the word list -into fragments makes updates easier and clearer as each fragment is a -grouping of related terms. The name of the file gives a clue as to the -contents but the comments at the top of each file provide further -detail. - -Every line that does not start with a comment symbol contains a single -word. An optional comment for a word may appear after the word and is -separated from the word by whitespace followed by the comment symbol: - -``` -word # This is a comment explaining this particular word list entry. -``` - -You *may* suffix each word by a forward slash followed by one or more -upper-case letters. Each letter refers to a rule name in the rules file: - -``` -word/AC # This word references the 'A' and 'C' rules. -``` - -#### Rules file - -The [rules file](data/rules.aff) contains a set of general rules that can be -applied to one or more root words in the word list files. You can make -comments in the rules file. - -For an explanation of the format of this file see -[`man 5 hunspell`](http://www.manpagez.com/man/5/hunspell) -([source](https://github.com/hunspell/hunspell/blob/master/man/hunspell.5)). - -## Adding a new word - -### Update the word list fragment - -If you want to allow a new word to the dictionary, - -- Check to ensure you do need to add the word - - Is the word valid and correct? If the word is a project, product, - or company name, is the capitalization correct? - -- Add the new word to the appropriate [word list fragment file](data). - - Specifically, if it is a general word, add the *root* of the word to - the appropriate fragment file. - -- Add a `/` suffix along with the letters for each rule to apply in order to - add rules references. - -### Optionally update the rules file - -It should not generally be necessary to update the rules file since it -already contains rules for most scenarios. However, if you need to -update the file, [read the documentation carefully](#rules-file). - -### Create the master dictionary files - -Every time you change the dictionary files you must recreate the master -dictionary files: - -```sh -$ ./kata-spell-check.sh make-dict -``` - -As a convenience, [checking a file](#spell-check-a-document-file) will -automatically create the database. - -### Test the changes - -You must test any changes to the [word list file -fragments](#word-list-file-fragments) or the [rules file](#rules-file) -by doing the following: - -1. Recreate the [master dictionary files](#create-the-master-dictionary-files). - -1. [Run the spell checker](#spell-check-a-document-file) on a file containing the - words you have added to the dictionary. diff --git a/tests/cmd/check-spelling/data/acronyms.txt b/tests/cmd/check-spelling/data/acronyms.txt deleted file mode 100644 index c91d4eb434..0000000000 --- a/tests/cmd/check-spelling/data/acronyms.txt +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: List of acronyms and abbreviations. - -ACPI/AB -acpi -ACS/AB -API/AB -api/AB # when used in links like "api.github.com" -AUFS # Another Union FS -AWS/AB -BDF/AB -CFS/AB -ci/AB -CLI/AB -CNI/AB -CNM/AB -CPUID/AB -CRI/AB -CVE/AB -DAX/AB -DinD/B # Docker in Docker -dind/B -DMA/AB -DPDK/AB -enum/A -FaaS/B # Function as a Service -FS/AB -fs/B # For terms like "virtio-fs" -GCE/AB -GOPATH/AB -GPG/AB -GPU/AB -gRPC/AB -GSC/AB -GVT/AB -IaaS/B # Infrastructure as a Service -io/B -IOMMU/AB -IoT/AB # Internet of Things -IOV/AB -JSON/AB -k8s/B -KCSA/AB -KSM/AB -KVM/AB -LTS/AB -MACVTAP/AB -mem/B # For terms like "virtio-mem" -memdisk/B -MDEV/AB -NEMU/AB -NFD/AB # Node Feature Discovery -NIC/AB -nodeSelector/B # Kubernetes RuntimeClass scheduling field -nodeSelectors/B -nv/AB # NVIDIA abbreviation (lowercase) -NVDIMM/AB -OCI/AB -OVMF/AB -OverlayFS/B -PaaS/B # Platform as a Service -PCDIMM/AB -PCI/AB -PCIe/AB -PID/AB -pmem/B # persistent memory -PNG/AB -POD/AB -PR/AB -PSS/AB -QA/AB -QAT/AB -QEMU/AB -RBAC/AB -RDMA/AB -RNG/AB -RuntimeClass/B # Kubernetes resource (node.k8s.io) -RuntimeClasses/B -SaaS/B # Software as a Service -SCSI/AB -SDK/AB -seccomp # secure computing mode -SHA/AB -SEL/AB # IBM Secure Execution for Linux -SPDX/AB -SRIOV/AB -SEV-SNP/B # AMD Secure Encrypted Virtualization - Secure Nested Paging -SVG/AB -TBD/AB -TEE/AB # Trusted Execution Environment -TOC/AB -TOML/AB -TTY/AB -UI/AB -UTS/AB -UUID/AB -util/A -vCPU/AB -VETH/AB -VF/AB -VFIO/AB -vfio/AB # For terms like "vfio-pci" -VGPU/AB -vhost/AB -VHOST/AB -virtio/AB -VirtIO/AB -Virtio-fs/AB -Virtio-mem/AB -VLAN/AB -VM/AB -VMCache/AB -vmm -VMM/AB -VMX/AB -VPP/AB -VSOCK/AB -VSS/AB -WIP/AB # Work In Progress -WRT/AB # With Respect To -XIP/AB -YAML/AB -irq/AB -mmio/AB -APIC -msg/AB -UDS -dbs # Dragonball Sandbox -TDX -tdx -mptable -fdt -gic -msr -cpuid -pio -gpu diff --git a/tests/cmd/check-spelling/data/arches.txt b/tests/cmd/check-spelling/data/arches.txt deleted file mode 100644 index 08fa55d850..0000000000 --- a/tests/cmd/check-spelling/data/arches.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: List of architectures. - -# Architectures - -aarch64/B -amd64/B -arm64/B -ppc64el/B -ppc64le/B -s390x/B -x86_64/B -x86/B - -# Micro architecture names - -Haswell/B -Ivybridge/B diff --git a/tests/cmd/check-spelling/data/distros.txt b/tests/cmd/check-spelling/data/distros.txt deleted file mode 100644 index 1edca51f8a..0000000000 --- a/tests/cmd/check-spelling/data/distros.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: List of Linux Distributions. - -CentOS/B -Debian/B -EulerOS/B -Fedora/B -macOS/B -MacOS/B -minikube/B -openSUSE/B -OpenSUSE/B -RHEL/B -SLES/B -Ubuntu/B diff --git a/tests/cmd/check-spelling/data/files.txt b/tests/cmd/check-spelling/data/files.txt deleted file mode 100644 index 5fa4fc1168..0000000000 --- a/tests/cmd/check-spelling/data/files.txt +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: Names of commands, files and packages. -# -# Notes: These *should* strictly be placed in backticks but alas this -# doesn't always happen. -# -# References: https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#files-and-command-names - -cgroup/AB -coredump/A -cpuset/AB -Dockerfile/AB -init/AB -initramfs/AB -initrd/AB -netns/AB -rootfs/AB -stderr/AB -stdin/AB -stdout/AB -syslog/AB -Vagrantfile/B diff --git a/tests/cmd/check-spelling/data/hunspell.txt b/tests/cmd/check-spelling/data/hunspell.txt deleted file mode 100644 index feae4b539a..0000000000 --- a/tests/cmd/check-spelling/data/hunspell.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: List of words that are missing from Hunspell dictionaries -# on some platforms. - -committer/AB # Not available on Ubuntu 16.04 or CentOS 7 -plugin/AB # Not available on Ubuntu 16.04 -regexp/AB # Not available on Ubuntu 16.04 -screenshot/AB # Not available on Ubuntu 16.04 or CentOS 7 -tarball/AB # Not available on Ubuntu 16.04 -uninstall # Not available on Ubuntu 16.04 diff --git a/tests/cmd/check-spelling/data/main.txt b/tests/cmd/check-spelling/data/main.txt deleted file mode 100644 index e38748fd8b..0000000000 --- a/tests/cmd/check-spelling/data/main.txt +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: General word list. - -Acknowledgement -ack/A -arg # Argument -auditability -backend -backport/ACD -backtick/AB -backtrace -bootloader/AB -centric/B -checkbox/A -chipset/AB -chroot # Unix change root command -codebase -commandline -config/AB -crypto # Cryptography -cryptoprocessor/AB -DaemonSet/AB -deliverable/AB -dev -devmapper/B -devicemapper/B -deploy -dialer -dialog/A -Diffie/B # Diffie–Hellman (cryptography) -distro/AB -emptydir/A -enablement/AB -entrypoint/AB -ethernet -filename/AB -filesystem/AB -freeform -genpolicy/AB # Kata policy generation tool -goroutine/AB -hostname/AB -hostPath -hotplug/ACD -howto/AB -HugePage/AB -hugepage/AB -Hyp -hypercall/A -hypervisor/AB -implementer/A -implementor/A -Infiniband -iodepth/A -ioengine/A -iptables -Itanium/AB -kata -Kat/AB # "Kat Herding Team" :) -keypair/A -lifecycle/A -linter/AB -logfile/A -Longterm -longterm -loopback -memcpy/A -mergeable -metadata -microcontroller/AB -miniOS -mmap/AB -MonitorTest/A -nack/AB -namespace/ABCD -netlink -NVIDIA/A -nvidia/A -onwards -OpenAPI -OS/AB -parallelize/AC -passthrough -patchset/A -pluggable/AB -portmapper/AB -portmapping/A -pre -prefetch/ACD -prestart -programmatically -proxying -Quadro -ramdisk/A -readonly -rebase/ACD -refactor/ACD -regs -remediate -repo/A -runtime/AB -scalability -serverless -signoff/A -snapshotter/AB -stalebot/B -startup -Submodule/A -submodule/A -subdirectory/A -swappiness -sysctl/AB -teardown -templating -timestamp/AB -tracability -ttRPC/B -udev/B -uevent/AB -unbootable -uncomment/ACD -unported -unskip/AC -untrusted -untrusting -userid/AB -userspace/B -vectorAdd # CUDA sample name -vendored -vendoring -versioning -vGPU -virtualization -virtualized -webhook/AB -whitespace -workflow/A -Xeon/A -yaml -upcall -Upcall -ioctl/A -struct/A # struct in Rust -Struct/A -intel diff --git a/tests/cmd/check-spelling/data/projects.txt b/tests/cmd/check-spelling/data/projects.txt deleted file mode 100644 index b48767cac7..0000000000 --- a/tests/cmd/check-spelling/data/projects.txt +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2019-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: Names of projects, companies and services. - -Ansible/B -AppArmor/B -bisecter/B -blogbench/B -BusyBox/B -Cassandra/B -ccloudvm/B -codecov/B -containerd/B -cnn/B -cri-o/B -CRI-O/B -DevStack/B -Django/B -Docker/B -dracut/B -Dragonball/B -Facebook/B -fio/B -Fluentd/B -Frakti/B -Git/B -GitHub/B -GoDoc/B -golang/B -Golang/B -Grafana/B -Gramine/B -Huawei/B -Inclavare/B -iPerf/B -IPerf/B -Istio/B -Jaeger/B -Jenkins/B -Jupyter/B -journald/B -jq/B -Kata/B -Kibana/B -Kubelet/B -Kubernetes/B -kubernetes/B # when used in links or paths -Launchpad/B -LevelDB/B -libcontainer/B -libelf/B -libvirt/B -Linkerd/B -LinuxONE/B -Logrus/B -Logstash/B -Mellanox/B -Minikube/B -MITRE/B -musl/B -Netlify/B -nydus/B # Nydus image, container image format (e.g. guest-pull) -Nginx/B -OpenCensus/B -OpenPGP/B -openshift/B # lower-case used for some sub-projects -OpenShift/B -OpenSSL/B -OpenStack/B -OpenTelemetry/B -OpenTracing/B -osbuilder/B -packagecloud/B -Pandoc/B -Podman/B -PullApprove/B -Pytorch/B -QuickAssist/B -R/B -raytracer/B -rkt/B/B -runc/B -runV/B -rustlang/B -Rustlang/B -SELinux/B -SemaphoreCI/B -snapcraft/B -snapd/B -snphost/B -SQLite/B -StratoVirt/B -SUSE/B -Sysbench/B -systemd/B -tf/B -TravisCI/B -Tokio/B -Vexxhost/B -virtcontainers/B -VMWare/B -vSphere/B -Yamux/B -yq/B -Zun/B diff --git a/tests/cmd/check-spelling/data/rules.aff b/tests/cmd/check-spelling/data/rules.aff deleted file mode 100644 index 7f37dbf477..0000000000 --- a/tests/cmd/check-spelling/data/rules.aff +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -SET UTF-8 - -# Add the following characters so they are accepted as part of a word -WORDCHARS 0123456789' - -# Disable hyphenation -BREAK 0 - -# plural -SFX A N 3 -SFX A 0 s [^x] -SFX A 0 es x -SFX A y ies - -# possession -SFX B N 1 -SFX B 0 's - -# past tense -SFX C N 4 -SFX C 0 d e -SFX C 0 ed [rt] -SFX C 0 ped p -SFX C 0 ged g - -# present continuous -SFX D N 3 -SFX D 0 ging g -SFX D 0 ing [rt] -SFX D e ing e diff --git a/tests/cmd/check-spelling/kata-dictionary.aff b/tests/cmd/check-spelling/kata-dictionary.aff deleted file mode 100644 index 7f37dbf477..0000000000 --- a/tests/cmd/check-spelling/kata-dictionary.aff +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -SET UTF-8 - -# Add the following characters so they are accepted as part of a word -WORDCHARS 0123456789' - -# Disable hyphenation -BREAK 0 - -# plural -SFX A N 3 -SFX A 0 s [^x] -SFX A 0 es x -SFX A y ies - -# possession -SFX B N 1 -SFX B 0 's - -# past tense -SFX C N 4 -SFX C 0 d e -SFX C 0 ed [rt] -SFX C 0 ped p -SFX C 0 ged g - -# present continuous -SFX D N 3 -SFX D 0 ging g -SFX D 0 ing [rt] -SFX D e ing e diff --git a/tests/cmd/check-spelling/kata-dictionary.dic b/tests/cmd/check-spelling/kata-dictionary.dic deleted file mode 100644 index 577c7c9ba3..0000000000 --- a/tests/cmd/check-spelling/kata-dictionary.dic +++ /dev/null @@ -1,419 +0,0 @@ -418 -ACPI/AB -ACS/AB -API/AB -APIC -AUFS -AWS/AB -Acknowledgement -Ansible/B -AppArmor/B -BDF/AB -BusyBox/B -CFS/AB -CLI/AB -CNI/AB -CNM/AB -CPUID/AB -CRI-O/B -CRI/AB -CVE/AB -Cassandra/B -CentOS/B -DAX/AB -DMA/AB -DPDK/AB -DaemonSet/AB -Debian/B -DevStack/B -Diffie/B -DinD/B -Django/B -Docker/B -Dockerfile/AB -Dragonball/B -EulerOS/B -FS/AB -FaaS/B -Facebook/B -Fedora/B -Fluentd/B -Frakti/B -GCE/AB -GOPATH/AB -GPG/AB -GPU/AB -GSC/AB -GVT/AB -Git/B -GitHub/B -GoDoc/B -Golang/B -Grafana/B -Gramine/B -Haswell/B -Huawei/B -HugePage/AB -Hyp -IOMMU/AB -IOV/AB -IPerf/B -IaaS/B -Inclavare/B -Infiniband -IoT/AB -Istio/B -Itanium/AB -Ivybridge/B -JSON/AB -Jaeger/B -Jenkins/B -Jupyter/B -KCSA/AB -KSM/AB -KVM/AB -Kat/AB -Kata/B -Kibana/B -Kubelet/B -Kubernetes/B -LTS/AB -Launchpad/B -LevelDB/B -Linkerd/B -LinuxONE/B -Logrus/B -Logstash/B -Longterm -MACVTAP/AB -MDEV/AB -MITRE/B -MacOS/B -Mellanox/B -Minikube/B -MonitorTest/A -NEMU/AB -NFD/AB -NIC/AB -NVDIMM/AB -NVIDIA/A -Netlify/B -Nginx/B -OCI/AB -OS/AB -OVMF/AB -OpenAPI -OpenCensus/B -OpenPGP/B -OpenSSL/B -OpenSUSE/B -OpenShift/B -OpenStack/B -OpenTelemetry/B -OpenTracing/B -OverlayFS/B -PCDIMM/AB -PCI/AB -PCIe/AB -PID/AB -PNG/AB -POD/AB -PR/AB -PSS/AB -PaaS/B -Pandoc/B -Podman/B -PullApprove/B -Pytorch/B -QA/AB -QAT/AB -QEMU/AB -Quadro -QuickAssist/B -R/B -RBAC/AB -RDMA/AB -RHEL/B -RNG/AB -RuntimeClass/B -RuntimeClasses/B -Rustlang/B -SCSI/AB -SDK/AB -SEL/AB -SELinux/B -SEV-SNP/B -SHA/AB -SLES/B -SPDX/AB -SQLite/B -SRIOV/AB -SUSE/B -SVG/AB -SaaS/B -SemaphoreCI/B -StratoVirt/B -Struct/A -Submodule/A -Sysbench/B -TBD/AB -TDX -TEE/AB -TOC/AB -TOML/AB -TTY/AB -Tokio/B -TravisCI/B -UDS -UI/AB -UTS/AB -UUID/AB -Ubuntu/B -Upcall -VETH/AB -VF/AB -VFIO/AB -VGPU/AB -VHOST/AB -VLAN/AB -VM/AB -VMCache/AB -VMM/AB -VMWare/B -VMX/AB -VPP/AB -VSOCK/AB -VSS/AB -Vagrantfile/B -Vexxhost/B -VirtIO/AB -Virtio-fs/AB -Virtio-mem/AB -WIP/AB -WRT/AB -XIP/AB -Xeon/A -YAML/AB -Yamux/B -Zun/B -aarch64/B -ack/A -acpi -amd64/B -api/AB -arg -arm64/B -auditability -backend -backport/ACD -backtick/AB -backtrace -bisecter/B -blogbench/B -bootloader/AB -ccloudvm/B -centric/B -cgroup/AB -checkbox/A -chipset/AB -chroot -ci/AB -cnn/B -codebase -codecov/B -commandline -committer/AB -config/AB -containerd/B -coredump/A -cpuid -cpuset/AB -cri-o/B -crypto -cryptoprocessor/AB -dbs -deliverable/AB -deploy -dev -devicemapper/B -devmapper/B -dialer -dialog/A -dind/B -distro/AB -dracut/B -emptydir/A -enablement/AB -entrypoint/AB -enum/A -ethernet -fdt -filename/AB -filesystem/AB -fio/B -freeform -fs/B -gRPC/AB -genpolicy/AB -gic -golang/B -goroutine/AB -gpu -hostPath -hostname/AB -hotplug/ACD -howto/AB -hugepage/AB -hypercall/A -hypervisor/AB -iPerf/B -implementer/A -implementor/A -init/AB -initramfs/AB -initrd/AB -intel -io/B -ioctl/A -iodepth/A -ioengine/A -iptables -irq/AB -journald/B -jq/B -k8s/B -kata -keypair/A -kubernetes/B -libcontainer/B -libelf/B -libvirt/B -lifecycle/A -linter/AB -logfile/A -longterm -loopback -macOS/B -mem/B -memcpy/A -memdisk/B -mergeable -metadata -microcontroller/AB -miniOS -minikube/B -mmap/AB -mmio/AB -mptable -msg/AB -msr -musl/B -nack/AB -namespace/ABCD -netlink -netns/AB -nodeSelector/B -nodeSelectors/B -nv/AB -nvidia/A -nydus/B -onwards -openSUSE/B -openshift/B -osbuilder/B -packagecloud/B -parallelize/AC -passthrough -patchset/A -pio -pluggable/AB -plugin/AB -pmem/B -portmapper/AB -portmapping/A -ppc64el/B -ppc64le/B -pre -prefetch/ACD -prestart -programmatically -proxying -ramdisk/A -raytracer/B -readonly -rebase/ACD -refactor/ACD -regexp/AB -regs -remediate -repo/A -rkt/B/B -rootfs/AB -runV/B -runc/B -runtime/AB -rustlang/B -s390x/B -scalability -screenshot/AB -seccomp -serverless -signoff/A -snapcraft/B -snapd/B -snapshotter/AB -snphost/B -stalebot/B -startup -stderr/AB -stdin/AB -stdout/AB -struct/A -subdirectory/A -submodule/A -swappiness -sysctl/AB -syslog/AB -systemd/B -tarball/AB -tdx -teardown -templating -tf/B -timestamp/AB -tracability -ttRPC/B -udev/B -uevent/AB -unbootable -uncomment/ACD -uninstall -unported -unskip/AC -untrusted -untrusting -upcall -userid/AB -userspace/B -util/A -vCPU/AB -vGPU -vSphere/B -vectorAdd -vendored -vendoring -versioning -vfio/AB -vhost/AB -virtcontainers/B -virtio/AB -virtualization -virtualized -vmm -webhook/AB -whitespace -workflow/A -x86/B -x86_64/B -yaml -yq/B diff --git a/tests/cmd/check-spelling/kata-spell-check.sh b/tests/cmd/check-spelling/kata-spell-check.sh deleted file mode 100755 index 576100f96a..0000000000 --- a/tests/cmd/check-spelling/kata-spell-check.sh +++ /dev/null @@ -1,336 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -# Description: spell-check utility. - -[ -n "$DEBUG" ] && set -x - -set -o errexit -set -o pipefail -set -o nounset - -# Ensure we spell check in English -LANG=C -LC_ALL=C - -script_name=${0##*/} - -if [ "$(uname -s)" == "Darwin" ] -then - # Hunspell dictionaries are a not easily available - # on this platform it seems. - echo "INFO: $script_name: OSX not supported - exiting" - exit 0 -fi - -self_dir=$(dirname "$(readlink -f "$0")") -cidir="${self_dir}/../../../tests" - -source "${cidir}/common.bash" - -# Directory containing word lists. -# -# Each file in this directory must: -# -# - Have the ".txt" extension. -# - Contain one word per line. -# -# Additionally, the files may contain blank lines and comments -# (lines beginning with '#'). -KATA_DICT_FRAGMENT_DIR=${KATA_DICT_FRAGMENT_DIR:-data} - -KATA_DICT_NAME="${KATA_DICT_NAME:-kata-dictionary}" - -# Name of dictionary file suitable for using with hunspell(1) -# as a personal dictionary. -KATA_DICT_FILE="${KATA_DICT_FILE:-${KATA_DICT_NAME}.dic}" - -KATA_RULES_FILE="${KATA_RULES_FILE:-${KATA_DICT_FILE/.dic/.aff}}" - -# command to remove code from markdown (inline and blocks) -strip_cmd="${cidir}/kata-doc-to-script.sh" - -fragment_dir="${self_dir}/${KATA_DICT_FRAGMENT_DIR}" - -# Name of file containing dictionary rules that apply to the -# KATA_DICT_FILE word list. -rules_file_name="rules.aff" - -# Command to spell check a file -spell_check_cmd="${KATA_SPELL_CHECK_CMD:-hunspell}" - -# Command to convert a markdown file into plain text -md_convert_tool="${KATA_MARKDOWN_CONVERT_TOOL:-pandoc}" - -KATA_DICT_DIR="${KATA_DICT_DIR:-${self_dir}}" -dict_file="${KATA_DICT_DIR}/${KATA_DICT_FILE}" -rules_file="${KATA_DICT_DIR}/${KATA_RULES_FILE}" - -# Hunspell refers to custom dictionary by their path followed by the name of -# the dictionary (without the file extension). -kata_dict_ref="${KATA_DICT_DIR}/${KATA_DICT_NAME}" - -# All project documentation must be written in English, -# with American English taking priority. -# -# We also use a custom dictionary which has to be specified by its -# "directory and name prefix" and which must also be the first specified -# dictionary. -dict_languages="${kata_dict_ref},en_US,en_GB" - -make_dictionary() -{ - [ -d "$fragment_dir" ] || die "invalid fragment directory" - [ -z "$dict_file" ] && die "missing dictionary output file name" - - # Note: the first field is extracted to allow for inline - # comments in each fragment. For example: - # - # word # this text describes why the word is in the dictionary. - # - local dict - - dict=$(cat "$fragment_dir"/*.txt |\ - grep -v '^#' |\ - grep -v '^$' |\ - awk '{print $1}' |\ - sort -u || true) - - [ -z "$dict" ] && die "generated dictionary is empty" - - # Now, add in the number of words as a header (required by Hunspell) - local count - - count=$(echo "$dict"| wc -l | awk '{print $1}' || true) - [ -z "$count" ] && die "cannot determine dictionary length" - [ "$count" -eq 0 ] && die "invalid dictionary length" - - # Construct the dictionary - (echo "$count"; echo "$dict") > "$dict_file" - - cp "${fragment_dir}/${rules_file_name}" "${rules_file}" -} - -spell_check_file() -{ - local file="$1" - - [ -z "$file" ] && die "need file to check" - [ -e "$file" ] || die "file does not exist: '$file'" - - [ -e "$dict_file" ] || make_dictionary - - info "Spell checking file '$file'" - - # Determine the pandoc input format. - local pandoc_input_fmts - local pandoc_input_fmt - - local pandoc_input_fmts=$(pandoc --list-input-formats 2>/dev/null || true) - - if [ -z "$pandoc_input_fmts" ] - then - # We're using a very old version of pandoc that doesn't - # support listing its available input formats, so - # specify a default. - pandoc_input_fmt="markdown_github" - else - # Pandoc has multiple names for the gfm parser so find one of them - pandoc_input_fmt=$(echo "$pandoc_input_fmts" |\ - grep -E "gfm|github" |\ - head -1 || true) - fi - - [ -z "$pandoc_input_fmt" ] && die "cannot find usable pandoc input format" - - local stripped_doc - - local pandoc_doc - local utf8_free_doc - local pre_hunspell_doc - local hunspell_results - local final_results - - # First strip out all code blocks and convert all - # "quoted apostrophe's" ('\'') back into a single apostrophe. - stripped_doc=$("$strip_cmd" -i "$file" -) - - # Next, convert the remainder it into plain text to remove the - # remaining markdown syntax. - # - # Before pandoc gets hold of it: - # - # - Replace pipes with spaces. This - # fixes an issue with old versions of pandoc (Ubuntu 16.04) - # which completely mangle tables into nonsense. - # - # - Remove empty reference links. - # - # For example, this markdown - # - # blah [`qemu-lite`][qemu-lite] blah. - # : - # [qemu-lite]: https://... - # - # Gets converted into - # - # blah [][qemu-lite] blah. - # : - # [qemu-lite]: https://... - # - # And the empty set of square brackets confuses pandoc. - # - # After pandoc has processed the data, remove any remaining - # "inline links" in this format: - # - # [link name](#link-address) - # - # This is strictly only required for old versions of pandoc. - - pandoc_doc=$(echo "$stripped_doc" |\ - tr '|' ' ' |\ - sed 's/\[\]\[[^]]*\]//g' |\ - "$md_convert_tool" -f "${pandoc_input_fmt}" -t plain - |\ - sed 's/\[[^]]*\]([^\)]*)//g' || true) - - # Convert the file into "pure ASCII" by removing all awkward - # Unicode characters that won't spell check. - # - # Necessary since pandoc is "clever" and will convert things like - # GitHub's colon emojis (such as ":smile:") into the actual utf8 - # character where possible. - utf8_free_doc=$(echo "$pandoc_doc" | iconv -c -f utf-8 -t ascii) - - # Next, perform the following simplifications: - # - # - Remove URLs. - # - Remove email addresses. - # - Replace most punctuation symbols with a space - # (excluding a dash (aka hyphen!) - # - Carefully remove non-hyphen dashes. - # - Remove GitHub @userids. - pre_hunspell_doc=$(echo "$utf8_free_doc" |\ - sed 's,https*://[^[:space:]()][^[:space:]()]*,,g' |\ - sed -r 's/[a-zA-Z0-9.-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+//g' |\ - tr '[,\[\]()\*\\/\|=]' ' ' |\ - sed -e 's/^ *-//g' -e 's/- $//g' -e 's/ -//g' |\ - sed 's/@[a-zA-Z0-9][a-zA-Z0-9]*\b//g') - - # Call the spell checker - hunspell_results=$(echo "$pre_hunspell_doc" | $spell_check_cmd -d "${dict_languages}") - - # Finally, post-process the hunspell output: - # - # - Parse the output to ignore: - # - Hunspell banner. - # - Correctly spelt words (lines starting with '*', '+' or '-'). - # - All words containing numbers (like "100MB"). - # - All words that appear to be acronymns / Abbreviations - # (atleast two upper-case letters and which may be plural or - # possessive). - # - All words that appear to be numbers. - # - All possessives and the dreaded isolated "'s" which occurs - # for input like this: - # - # `kata-shim`'s - # - # which gets converted by $strip_cmd into simply: - # - # 's - # - # - Sort output. - - final_results=$(echo "$hunspell_results" |\ - grep -Evi "(ispell|hunspell)" |\ - grep -Ev '^(\*|\+|-)' |\ - grep -Evi "^(&|#) [^ ]*[0-9][^ ]*" |\ - grep -Ev "^. [A-Z][A-Z][A-Z]*(s|'s)*" |\ - grep -Ev "^. 's" |\ - sort -u || true) - - local line - local incorrects - local near_misses - - near_misses=$(echo "$final_results" | grep '^&' || true) - incorrects=$(echo "$final_results" | grep '^#' | awk '{print $2}' || true) - - local -i failed=0 - - [ -n "$near_misses" ] && failed+=1 - [ -n "$incorrects" ] && failed+=1 - - echo "$near_misses" | while read -r line - do - [ "$line" = "" ] && continue - - local word - local possibles - - word=$(echo "$line" | awk '{print $2}') - possibles=$(echo "$line" | cut -d: -f2- | sed 's/^ *//g') - - warn "Word '${word}': did you mean one of the following?: ${possibles}" - done - - local incorrect - for incorrect in $incorrects - do - warn "Incorrect word: '$incorrect'" - done - - [ "$failed" -gt 0 ] && die "Spell check failed for file: '$file'" - - info "Spell check successful for file: '$file'" -} - -delete_dictionary() -{ - rm -f "${KATA_DICT_FILE}" "${KATA_RULES_FILE}" -} - -setup() -{ - local cmd - - for cmd in "$spell_check_cmd" "$md_convert_tool" - do - command -v "$cmd" &>/dev/null || die "Need $cmd command" - done -} - -usage() -{ - cat <<-EOF - Usage: ${script_name} [arguments] - - Description: Spell-checking utility. - - Commands: - - check : Spell check the specified file - (implies 'make-dict'). - delete-dict : Delete the dictionary. - help : Show this usage. - make-dict : Create the dictionary. -EOF -} - -main() -{ - setup - - [ -z "${1:-}" ] && usage && echo && die "need command" - - case "$1" in - check) shift && spell_check_file "$1" ;; - delete-dict) delete_dictionary ;; - help|-h|--help) usage && exit 0 ;; - make-dict) make_dictionary ;; - *) die "invalid command: '$1'" ;; - esac -} - -main "$@" diff --git a/tests/static-checks.sh b/tests/static-checks.sh index 1319f1fe3f..bb606210d4 100755 --- a/tests/static-checks.sh +++ b/tests/static-checks.sh @@ -792,24 +792,6 @@ static_check_docs() # Synchronisation point wait - - # Now, spell check the docs - cmd="${test_dir}/cmd/check-spelling/kata-spell-check.sh" - - local docs_failed=0 - for doc in $docs - do - "$cmd" check "$doc" || { info "spell check failed for document $doc" && docs_failed=1; } - - static_check_eof "$doc" - done - - popd - - [ $docs_failed -eq 0 ] || { - url='https://github.com/kata-containers/kata-containers/blob/main/docs/Documentation-Requirements.md#spelling' - die "spell check failed, See $url for more information." - } } static_check_eof()