nur-packages

My NUR packages
git clone git://git.sikmir.ru/nur-packages
Log | Files | Refs | README | LICENSE

commit 1f69645e31b5f38112710e4ffc05c95709659705
parent 772e850cefefecf26b594618e4445dc3511a6f20
Author: Nikolay Korotkiy <sikmir@gmail.com>
Date:   Sat, 12 Dec 2020 17:03:04 +0300

Add tatoeba dicts

Diffstat:
Apkgs/data/dicts/tatoeba/default.nix | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apkgs/data/dicts/tatoeba/tatoeba.json | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mpkgs/default.nix | 1+
3 files changed, 140 insertions(+), 0 deletions(-)

diff --git a/pkgs/data/dicts/tatoeba/default.nix b/pkgs/data/dicts/tatoeba/default.nix @@ -0,0 +1,61 @@ +{ stdenvNoCC, lib, fetchurl, dict, jq, moreutils, stardict-tools, tatoebatools }: +let + langs = [ + "deu eng" + "deu rus" + "eng epo" + "eng deu" + "eng fin" + "eng rus" + "epo eng" + "epo rus" + "fin eng" + "fin rus" + "rus deu" + "rus eng" + "rus epo" + "rus fin" + ]; + tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); +in +stdenvNoCC.mkDerivation rec { + pname = "tatoeba"; + version = "2020-12-05"; + + srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; + + unpackPhase = '' + echo "{}" > versions.json + '' + lib.concatMapStringsSep "\n" (src: '' + bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} + jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \ + sponge versions.json + '') srcs; + + nativeBuildInputs = [ jq moreutils stardict-tools dict tatoebatools ]; + + buildPhase = let + makeDict = lang: with lib; '' + parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab + stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab + ''; + in '' + export XDG_DATA_HOME=$PWD + mkdir -p tatoebatools/{links,sentences_detailed} + mv *_links.tsv tatoebatools/links + mv *_sentences_detailed.tsv tatoebatools/sentences_detailed + mv versions.json tatoebatools + ${lib.concatMapStringsSep "\n" makeDict langs} + ''; + + installPhase = "install -Dm644 *.{dict.dz,idx,ifo} -t $out"; + + meta = with lib; { + description = "Tatoeba is a collection of sentences and translations"; + homepage = "https://tatoeba.org/"; + license = licenses.free; + maintainers = with maintainers; [ sikmir ]; + platforms = platforms.all; + skip.ci = true; + }; +} diff --git a/pkgs/data/dicts/tatoeba/tatoeba.json b/pkgs/data/dicts/tatoeba/tatoeba.json @@ -0,0 +1,78 @@ +{ + "deu_sentences_detailed": { + "url": "https://downloads.tatoeba.org/exports/per_language/deu/deu_sentences_detailed.tsv.bz2", + "sha256": "119v8pfs1rwm08f3cl162yxb37jqsf7r29kcl9gffmvxdf9bnn2m" + }, + "eng_sentences_detailed": { + "url": "https://downloads.tatoeba.org/exports/per_language/eng/eng_sentences_detailed.tsv.bz2", + "sha256": "0fxzb5mqnzczk5vsjrw4fnmg2g19xja8h4ngj8h5as11ymc5v360" + }, + "epo_sentences_detailed": { + "url": "https://downloads.tatoeba.org/exports/per_language/epo/epo_sentences_detailed.tsv.bz2", + "sha256": "1a3w2cwspafz4g0lkwiq6p4bdwz4xikih579iflax2g4zvk1xnhz" + }, + "fin_sentences_detailed": { + "url": "https://downloads.tatoeba.org/exports/per_language/fin/fin_sentences_detailed.tsv.bz2", + "sha256": "1x7ivh2f5d5nf4cxdlji934rvg60q9sbzg00n4mxsp9axjjsq8vd" + }, + "rus_sentences_detailed": { + "url": "https://downloads.tatoeba.org/exports/per_language/rus/rus_sentences_detailed.tsv.bz2", + "sha256": "1rs0pj0a02b0cb3wh3anhq306s7f30g39ig3kcpw18jscncbbgb8" + }, + "deu-eng_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/deu/deu-eng_links.tsv.bz2", + "sha256": "04wiml623prpna9x7w8r2bl0br0fpar9c0zsqjf8v7jm8hcqnn9g" + }, + "deu-rus_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/deu/deu-rus_links.tsv.bz2", + "sha256": "1lzz9mg19kl4p5gy1fjinlhbkbqc9pgdbmw17z47yf62pyf7hzk5" + }, + "eng-epo_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/eng/eng-epo_links.tsv.bz2", + "sha256": "1k2mg81z5191m2m8p2kr7ghwiakxpjvr8by2gn1fdyi0sh77pfwi" + }, + "eng-deu_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/eng/eng-deu_links.tsv.bz2", + "sha256": "0ag10iv8v829ny6bfdldcw2wzil4q8803h265934ynydmjg5dh54" + }, + "eng-fin_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/eng/eng-fin_links.tsv.bz2", + "sha256": "09rci2696xjp5pjb2w4w0m8njczycy2i9kdri2qzvw5hj9rp1cmh" + }, + "eng-rus_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/eng/eng-rus_links.tsv.bz2", + "sha256": "0lxbz5pfx6f11l24kixsw1w1xy2w1rwmnprm0bfamhzbir1i8lm6" + }, + "epo-eng_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/epo/epo-eng_links.tsv.bz2", + "sha256": "0dhgr07wsxnarvd1rqx7l289fhgs1jwr5znjvbwgqbcp3c87cfbd" + }, + "epo-rus_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/epo/epo-rus_links.tsv.bz2", + "sha256": "0zmvxh76vy9ghq6ag8kcng0jf77p07yv37dvhwigv0ngjwi03hmc" + }, + "fin-eng_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/fin/fin-eng_links.tsv.bz2", + "sha256": "07mm92z5d1vdqmchx4ii8q0ndlar76p64cn3m5hhxlzl3z7cgd6p" + }, + "fin-rus_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/fin/fin-rus_links.tsv.bz2", + "sha256": "1ryja7587cmvyh63jcmpnqis7nlwmk6igbllirfsrflyja2q108p" + }, + "rus-deu_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/rus/rus-deu_links.tsv.bz2", + "sha256": "0azrlak6hr0mf6ghkms9ppqy2bcmv82ra6dfyw482n7qg8yjp48k" + }, + "rus-eng_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/rus/rus-eng_links.tsv.bz2", + "sha256": "14fccxghialf18pcxlij37l3gb2ywzqc6mlqjdgxd6m1inayldpk" + }, + "rus-epo_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/rus/rus-epo_links.tsv.bz2", + "sha256": "0rry16qmnpbqhnyminnqafwd0fy4y1rhb1cpwwy25mjlhfbhp0wd" + }, + "rus-fin_links": { + "url": "https://downloads.tatoeba.org/exports/per_language/rus/rus-fin_links.tsv.bz2", + "sha256": "0qx2fvysxxdgazqn2g0jf4n3i2rkz42ih54cw4lnw5ah26qwq0ks" + } +} diff --git a/pkgs/default.nix b/pkgs/default.nix @@ -64,6 +64,7 @@ lib.makeScope newScope ( freedict = callPackage ./data/dicts/freedict { }; huzheng = callPackage ./data/dicts/huzheng { }; it-sanasto = callPackage ./data/dicts/it-sanasto { }; + tatoeba = callPackage ./data/dicts/tatoeba { }; wiktionary = callPackage ./data/dicts/wiktionary { }; gpsmap64 = callPackage ./data/firmwares/gpsmap64 { };