default.nix (1921B)
1 { lib, stdenvNoCC, fetchurl, dict, jq, moreutils, stardict-tools, tatoebatools }: 2 let 3 langs = [ 4 "deu eng" 5 "deu rus" 6 "eng deu" 7 "eng epo" 8 "eng fin" 9 "eng hye" 10 "eng rus" 11 "eng swe" 12 "eng ukr" 13 "epo eng" 14 "epo rus" 15 "fin eng" 16 "fin rus" 17 "hye eng" 18 "hye rus" 19 "rus deu" 20 "rus eng" 21 "rus epo" 22 "rus fin" 23 "rus hye" 24 "rus swe" 25 "rus ukr" 26 "swe eng" 27 "swe rus" 28 "ukr eng" 29 "ukr rus" 30 ]; 31 tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); 32 in 33 stdenvNoCC.mkDerivation rec { 34 pname = "tatoeba"; 35 version = "2023-05-27"; 36 37 srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; 38 39 unpackPhase = '' 40 echo "{}" > versions.json 41 '' + lib.concatMapStringsSep "\n" 42 (src: '' 43 bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} 44 jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \ 45 sponge versions.json 46 '') 47 srcs; 48 49 nativeBuildInputs = [ dict jq moreutils stardict-tools tatoebatools ]; 50 51 buildPhase = 52 let 53 makeDict = lang: with lib; '' 54 parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 55 stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 56 ''; 57 in 58 '' 59 export XDG_DATA_HOME=$PWD 60 mkdir -p tatoebatools/{links,sentences_detailed} 61 mv *_links.tsv tatoebatools/links 62 mv *_sentences_detailed.tsv tatoebatools/sentences_detailed 63 mv versions.json tatoebatools 64 ${lib.concatMapStringsSep "\n" makeDict langs} 65 ''; 66 67 installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out"; 68 69 meta = with lib; { 70 description = "Tatoeba is a collection of sentences and translations"; 71 homepage = "https://tatoeba.org/"; 72 license = licenses.free; 73 maintainers = with maintainers; [ sikmir ]; 74 platforms = platforms.all; 75 skip.ci = true; 76 }; 77 }