default.nix (1260B)
1 { lib, stdenv, python3Packages, fetchFromGitHub, fetchurl, unzip, wikitextprocessor }: 2 3 let 4 brown = fetchurl { 5 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip"; 6 hash = "sha256-mydfmzuV171mzPt80ln0RaE7vl0fQQeroJ/T6DZLr6Y="; 7 }; 8 in 9 python3Packages.buildPythonApplication rec { 10 pname = "wiktextract"; 11 version = "1.99.7"; 12 13 src = fetchFromGitHub { 14 owner = "tatuylonen"; 15 repo = "wiktextract"; 16 rev = "3a3e5746305cf648a0386e089615aa533f68b66d"; 17 hash = "sha256-iL3mFxX32OaD8UdPdvMyc/ksmeCH4iykM37DgHd+KwE="; 18 }; 19 20 postPatch = '' 21 substituteInPlace setup.py \ 22 --replace-fail python-Levenshtein Levenshtein 23 ''; 24 25 propagatedBuildInputs = with python3Packages; [ levenshtein setuptools wikitextprocessor nltk ]; 26 27 nativeCheckInputs = with python3Packages; [ pytestCheckHook unzip ]; 28 29 # https://www.nltk.org/data.html#manual-installation 30 preCheck = '' 31 export NLTK_DATA=$PWD/nltk_data 32 mkdir -p nltk_data/corpora 33 unzip ${brown} -d nltk_data/corpora 34 ''; 35 36 meta = with lib; { 37 description = "Wiktionary dump file parser and multilingual data extractor"; 38 inherit (src.meta) homepage; 39 license = licenses.mit; 40 maintainers = [ maintainers.sikmir ]; 41 }; 42 }