default.nix (1391B)
1 { 2 lib, 3 stdenv, 4 python3Packages, 5 fetchFromGitHub, 6 fetchurl, 7 unzip, 8 wikitextprocessor, 9 }: 10 11 let 12 brown = fetchurl { 13 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip"; 14 hash = "sha256-mydfmzuV171mzPt80ln0RaE7vl0fQQeroJ/T6DZLr6Y="; 15 }; 16 in 17 python3Packages.buildPythonApplication { 18 pname = "wiktextract"; 19 version = "1.99.7"; 20 pyproject = true; 21 22 src = fetchFromGitHub { 23 owner = "tatuylonen"; 24 repo = "wiktextract"; 25 rev = "3a3e5746305cf648a0386e089615aa533f68b66d"; 26 hash = "sha256-iL3mFxX32OaD8UdPdvMyc/ksmeCH4iykM37DgHd+KwE="; 27 }; 28 29 postPatch = '' 30 substituteInPlace setup.py \ 31 --replace-fail python-Levenshtein Levenshtein 32 ''; 33 34 build-system = with python3Packages; [ setuptools ]; 35 36 dependencies = with python3Packages; [ 37 levenshtein 38 setuptools 39 wikitextprocessor 40 nltk 41 ]; 42 43 nativeCheckInputs = with python3Packages; [ 44 pytestCheckHook 45 unzip 46 ]; 47 48 # https://www.nltk.org/data.html#manual-installation 49 preCheck = '' 50 export NLTK_DATA=$PWD/nltk_data 51 mkdir -p nltk_data/corpora 52 unzip ${brown} -d nltk_data/corpora 53 ''; 54 55 meta = { 56 description = "Wiktionary dump file parser and multilingual data extractor"; 57 homepage = "https://github.com/tatuylonen/wiktextract"; 58 license = lib.licenses.mit; 59 maintainers = [ lib.maintainers.sikmir ]; 60 }; 61 }