default.nix (1315B)
1 { 2 lib, 3 stdenv, 4 python3Packages, 5 fetchFromGitHub, 6 fetchurl, 7 unzip, 8 wikitextprocessor, 9 }: 10 11 let 12 brown = fetchurl { 13 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip"; 14 hash = "sha256-mydfmzuV171mzPt80ln0RaE7vl0fQQeroJ/T6DZLr6Y="; 15 }; 16 in 17 python3Packages.buildPythonApplication { 18 pname = "wiktextract"; 19 version = "1.99.7"; 20 21 src = fetchFromGitHub { 22 owner = "tatuylonen"; 23 repo = "wiktextract"; 24 rev = "3a3e5746305cf648a0386e089615aa533f68b66d"; 25 hash = "sha256-iL3mFxX32OaD8UdPdvMyc/ksmeCH4iykM37DgHd+KwE="; 26 }; 27 28 postPatch = '' 29 substituteInPlace setup.py \ 30 --replace-fail python-Levenshtein Levenshtein 31 ''; 32 33 dependencies = with python3Packages; [ 34 levenshtein 35 setuptools 36 wikitextprocessor 37 nltk 38 ]; 39 40 nativeCheckInputs = with python3Packages; [ 41 pytestCheckHook 42 unzip 43 ]; 44 45 # https://www.nltk.org/data.html#manual-installation 46 preCheck = '' 47 export NLTK_DATA=$PWD/nltk_data 48 mkdir -p nltk_data/corpora 49 unzip ${brown} -d nltk_data/corpora 50 ''; 51 52 meta = { 53 description = "Wiktionary dump file parser and multilingual data extractor"; 54 homepage = "https://github.com/tatuylonen/wiktextract"; 55 license = lib.licenses.mit; 56 maintainers = [ lib.maintainers.sikmir ]; 57 }; 58 }