{
  "_id": "6a215569cd65a98ecbd2e17e",
  "Package": "cleanNLP",
  "Type": "Package",
  "Title": "A Tidy Data Model for Natural Language Processing",
  "Version": "3.1.0",
  "Author": "Taylor B. Arnold [aut, cre]",
  "Maintainer": "Taylor B. Arnold <tarnold2@richmond.edu>",
  "Description": "Provides a set of fast tools for converting a textual\ncorpus into a set of normalized tables. Users may make use of\nthe 'udpipe' back end with no external dependencies, or a\nPython back ends with 'spaCy' <https://spacy.io>. Exposed\nannotation tasks include tokenization, part of speech tagging,\nnamed entity recognition, and dependency parsing.",
  "SystemRequirements": "Python (>= 3.7.0)",
  "License": "LGPL-2",
  "URL": "https://statsmaths.github.io/cleanNLP/",
  "BugReports": "https://github.com/statsmaths/cleanNLP/issues",
  "LazyData": "true",
  "Encoding": "UTF-8",
  "VignetteBuilder": "knitr",
  "RoxygenNote": "7.3.1",
  "Config/pak/sysreqs": "libicu-dev libpng-dev python3",
  "Repository": "https://taylor-arnold.r-universe.dev",
  "Date/Publication": "2025-06-08 14:20:56 UTC",
  "RemoteUrl": "https://github.com/taylor-arnold/rpkg",
  "RemoteRef": "HEAD",
  "RemoteSha": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
  "RemoteSubdir": "cleanNLP",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-06-04 10:34:43 UTC",
    "User": "root"
  },
  "MD5sum": "f837ad2be524ed039f66fb87bf8fc5b8",
  "_user": "taylor-arnold",
  "_type": "src",
  "_file": "cleanNLP_3.1.0.tar.gz",
  "_fileid": "049bc8756ac6880f10fc2d5d0f3623b92ae306b65483a21979deac483b3e041b",
  "_filesize": 4829076,
  "_sha256": "049bc8756ac6880f10fc2d5d0f3623b92ae306b65483a21979deac483b3e041b",
  "_created": "2026-06-04T10:34:43.000Z",
  "_published": "2026-06-04T10:37:29.331Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79500597868,
      "time": 132,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7409273069"
    },
    {
      "job": 79500597879,
      "time": 135,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7409273839"
    },
    {
      "job": 79500597864,
      "time": 121,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7409268674"
    },
    {
      "job": 79500597852,
      "time": 98,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7409261221"
    },
    {
      "job": 79500107928,
      "time": 189,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7409229686"
    },
    {
      "job": 79500597875,
      "time": 107,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7409264620"
    },
    {
      "job": 79500597907,
      "time": 72,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7409252962"
    },
    {
      "job": 79500597888,
      "time": 87,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7409258089"
    },
    {
      "job": 79500597890,
      "time": 76,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7409254287"
    }
  ],
  "_buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/taylor-arnold/rpkg",
  "_commit": {
    "id": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
    "author": "taylor-arnold <taylor@dvlab.org>",
    "committer": "taylor-arnold <taylor@dvlab.org>",
    "message": "auto update\n",
    "time": 1749392456
  },
  "_maintainer": {
    "name": "Taylor B. Arnold",
    "email": "tarnold2@richmond.edu",
    "login": "taylor-arnold",
    "description": "Professor, Data Science & Statistics Faculty in Linguistics and Cognitive Sci. Director, @distant-viewing ",
    "uuid": 5752184
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "Matrix",
      "version": ">= 1.2",
      "role": "Imports"
    },
    {
      "package": "udpipe",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "version": ">= 1.15",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "version": ">= 1.4",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 1.0.1",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "version": ">= 2.2.2",
      "role": "Suggests"
    }
  ],
  "_owner": "taylor-arnold",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-23",
      "n": 6
    }
  ],
  "_tags": [],
  "_topics": [
    "algorithms",
    "spatial-analysis",
    "text-analysis"
  ],
  "_stars": 218,
  "_contributors": [
    {
      "user": "taylor-arnold",
      "count": 6,
      "uuid": 5752184
    },
    {
      "user": "dselivanov",
      "count": 1,
      "uuid": 5123805
    },
    {
      "user": "emilhvitfeldt",
      "count": 1,
      "uuid": 14034784
    },
    {
      "user": "joshuaulrich",
      "count": 1,
      "uuid": 1023193
    },
    {
      "user": "batpigandme",
      "count": 1,
      "uuid": 831732
    },
    {
      "user": "reisner",
      "count": 1,
      "uuid": 490216
    }
  ],
  "_userbio": {
    "uuid": 5752184,
    "type": "user",
    "name": "Taylor Arnold",
    "description": "Professor, Data Science & Statistics Faculty in Linguistics and Cognitive Sci. Director, @distant-viewing "
  },
  "_downloads": {
    "count": 558,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/cleanNLP"
  },
  "_devurl": "https://github.com/statsmaths/cleannlp",
  "_searchresults": 267,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/cleanNLP.html",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/taylor-arnold/rpkg",
  "_realowner": "taylor-arnold",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.24",
      "date": "2016-11-11"
    },
    {
      "version": "1.5.2",
      "date": "2017-04-12"
    },
    {
      "version": "1.9.0",
      "date": "2017-05-27"
    },
    {
      "version": "1.10.0",
      "date": "2017-07-01"
    },
    {
      "version": "2.0.3",
      "date": "2018-01-22"
    },
    {
      "version": "2.3.0",
      "date": "2018-11-18"
    },
    {
      "version": "3.0.0",
      "date": "2019-10-22"
    },
    {
      "version": "3.0.2",
      "date": "2020-03-08"
    },
    {
      "version": "3.0.3",
      "date": "2020-10-13"
    },
    {
      "version": "3.0.4",
      "date": "2022-08-15"
    },
    {
      "version": "3.0.7",
      "date": "2023-11-16"
    },
    {
      "version": "3.1.0",
      "date": "2024-05-20"
    }
  ],
  "_exports": [
    "cnlp_annotate",
    "cnlp_download_spacy",
    "cnlp_init_spacy",
    "cnlp_init_stringi",
    "cnlp_init_udpipe",
    "cnlp_utils_pca",
    "cnlp_utils_tf",
    "cnlp_utils_tfidf"
  ],
  "_datasets": [
    {
      "name": "un",
      "title": "Universal Declaration of Human Rights",
      "object": "un",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "doc_id",
        "text"
      ],
      "rows": 30,
      "table": true,
      "tojson": true
    },
    {
      "name": "word_frequency",
      "title": "Most frequent English words",
      "object": "word_frequency",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "language",
        "word",
        "frequency"
      ],
      "rows": 150000,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "cleanNLP-package",
      "title": "cleanNLP: A Tidy Data Model for Natural Language Processing",
      "topics": [
        "cleanNLP-package",
        "cleanNLP"
      ]
    },
    {
      "page": "cnlp_annotate",
      "title": "Run the annotation pipeline on a set of documents",
      "topics": [
        "cnlp_annotate"
      ]
    },
    {
      "page": "cnlp_download_spacy",
      "title": "Download model files needed for spacy",
      "topics": [
        "cnlp_download_spacy"
      ]
    },
    {
      "page": "cnlp_init_spacy",
      "title": "Interface for initializing the spacy backend",
      "topics": [
        "cnlp_init_spacy"
      ]
    },
    {
      "page": "cnlp_init_stringi",
      "title": "Interface for initializing the standard R backend",
      "topics": [
        "cnlp_init_stringi"
      ]
    },
    {
      "page": "cnlp_init_udpipe",
      "title": "Interface for initializing the udpipe backend",
      "topics": [
        "cnlp_init_udpipe"
      ]
    },
    {
      "page": "cnlp_utils_pca",
      "title": "Compute Principal Components and store as a Data Frame",
      "topics": [
        "cnlp_utils_pca"
      ]
    },
    {
      "page": "cnlp_utils_tfidf",
      "title": "Construct the TF-IDF Matrix from Annotation or Data Frame",
      "topics": [
        "cnlp_utils_tf",
        "cnlp_utils_tfidf"
      ]
    },
    {
      "page": "un",
      "title": "Universal Declaration of Human Rights",
      "topics": [
        "un"
      ]
    },
    {
      "page": "word_frequency",
      "title": "Most frequent English words",
      "topics": [
        "word_frequency"
      ]
    }
  ],
  "_readme": "https://github.com/taylor-arnold/rpkg/raw/HEAD/cleanNLP/README.md",
  "_rundeps": [
    "data.table",
    "here",
    "jsonlite",
    "lattice",
    "Matrix",
    "png",
    "rappdirs",
    "Rcpp",
    "RcppTOML",
    "reticulate",
    "rlang",
    "rprojroot",
    "stringi",
    "udpipe",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "wikipedia.Rmd",
      "filename": "wikipedia.html",
      "title": "Creating Text Visualizations with Wikipedia Data",
      "author": "Taylor Arnold",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Grabbing the data",
        "Running the cleanNLP annotation",
        "Reconstructing the text"
      ],
      "created": "2025-06-08 13:15:30",
      "modified": "2025-06-08 13:15:30",
      "commits": 1
    },
    {
      "source": "state-of-union.Rmd",
      "filename": "state-of-union.html",
      "title": "Exploring the State of the Union Addresses: A Case Study with cleanNLP",
      "author": "Taylor Arnold",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Running the cleanNLP annotation",
        "Exploratory Analysis",
        "Models",
        "Principal Component Analysis (PCA)",
        "Topic Models (LDA)",
        "Predictive Models"
      ],
      "created": "2025-06-08 13:15:30",
      "modified": "2025-06-08 13:15:30",
      "commits": 1
    }
  ],
  "_score": 8.4639377593052,
  "_indexed": true,
  "_nocasepkg": "cleannlp",
  "_universes": [
    "taylor-arnold",
    "statsmaths"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "3.1.0",
      "date": "2026-06-04T10:36:58.000Z",
      "distro": "noble",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "4f6a7ce5176f7dbd077eb9679fd6090c98958ff9dc5004737740122b49c1468a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "3.1.0",
      "date": "2026-06-04T10:37:00.000Z",
      "distro": "noble",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "d8b5effd7998a14bcfc2c18b6f9d80b1aca9611e4baccf0b20b3b24043aa6c9b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "3.1.0",
      "date": "2026-06-04T10:36:44.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "34afad477a68875a4b70a0d97858af2b15543905f540988efea39356b69a205b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "3.1.0",
      "date": "2026-06-04T10:36:26.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "ef77654b45c0d7f60afd08f22060984c8d143ee67c474504d06a84b175a11de3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "3.1.0",
      "date": "2026-06-04T10:36:48.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "736cc7054037493d5408b24b5cc251c62850083208e4976e0202b50f6ff983dc",
      "status": "success",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-06-04T10:35:55.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "8b85c0616f1058517e28f04237abe81918808b57e9bfa6163b80f4e578ffe53c",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-06-04T10:36:08.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "6b7c15d8bde8c9eab1562b1605ca796c2c0a4797844f8ebe317fce01f8f267d8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-06-04T10:35:58.000Z",
      "commit": "fef7c1b376e204f78d8b21e59fa70171e30e7e48",
      "fileid": "e3cd79183280af4c705389c510047cc37688c511a079e9629afa54351c8c00ba",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/taylor-arnold/actions/runs/26946312608"
    }
  ]
}