default.nix (1107B)
1 { lib, python3Packages, fetchFromGitHub, fetchurl, unzip, wikitextprocessor }: 2 3 let 4 brown = fetchurl { 5 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip"; 6 hash = "sha256-mydfmzuV171mzPt80ln0RaE7vl0fQQeroJ/T6DZLr6Y="; 7 }; 8 in 9 python3Packages.buildPythonApplication rec { 10 pname = "wiktextract"; 11 version = "1.99.6"; 12 13 src = fetchFromGitHub { 14 owner = "tatuylonen"; 15 repo = pname; 16 rev = "v${version}"; 17 hash = "sha256-XmpMzsuTzIbzyGNoWRs9twNVQ3kP6c7hXUVHPnR3m+c="; 18 }; 19 20 propagatedBuildInputs = with python3Packages; [ python-Levenshtein setuptools wikitextprocessor nltk ]; 21 22 checkInputs = with python3Packages; [ pytestCheckHook unzip ]; 23 24 # https://www.nltk.org/data.html#manual-installation 25 preCheck = '' 26 export NLTK_DATA=$PWD/nltk_data 27 mkdir -p nltk_data/corpora 28 unzip ${brown} -d nltk_data/corpora 29 ''; 30 31 meta = with lib; { 32 description = "Wiktionary dump file parser and multilingual data extractor"; 33 inherit (src.meta) homepage; 34 license = licenses.mit; 35 maintainers = [ maintainers.sikmir ]; 36 }; 37 }