default.nix (1977B)
1 { lib, stdenvNoCC, fetchurl, dict, jq, moreutils, stardict-tools, tatoebatools }: 2 let 3 langs = [ 4 "bel eng" 5 "bel rus" 6 "deu eng" 7 "deu rus" 8 "eng bel" 9 "eng deu" 10 "eng epo" 11 "eng fin" 12 "eng hye" 13 "eng rus" 14 "eng swe" 15 "eng ukr" 16 "epo eng" 17 "epo rus" 18 "fin eng" 19 "fin rus" 20 "hye eng" 21 "hye rus" 22 "rus bel" 23 "rus deu" 24 "rus eng" 25 "rus epo" 26 "rus fin" 27 "rus hye" 28 "rus swe" 29 "rus ukr" 30 "swe eng" 31 "swe rus" 32 "ukr eng" 33 "ukr rus" 34 ]; 35 tatoeba = builtins.fromJSON (builtins.readFile ./tatoeba.json); 36 in 37 stdenvNoCC.mkDerivation rec { 38 pname = "tatoeba"; 39 version = "2024-04-20"; 40 41 srcs = lib.mapAttrsToList (name: spec: fetchurl spec) tatoeba; 42 43 unpackPhase = '' 44 echo "{}" > versions.json 45 '' + lib.concatMapStringsSep "\n" 46 (src: '' 47 bzcat ${src} > ${lib.removeSuffix ".bz2" src.name} 48 jq '.+{"${lib.removeSuffix ".tsv.bz2" src.name}":"${version} 00:00:00"}' versions.json | \ 49 sponge versions.json 50 '') 51 srcs; 52 53 nativeBuildInputs = [ dict jq moreutils stardict-tools tatoebatools ]; 54 55 buildPhase = 56 let 57 makeDict = lang: with lib; '' 58 parallel_corpus ${lang} > tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 59 stardict-tabfile tatoeba_${replaceStrings [ " " ] [ "_" ] lang}.tab 60 ''; 61 in 62 '' 63 export XDG_DATA_HOME=$PWD 64 mkdir -p tatoebatools/{links,sentences_detailed} 65 mv *_links.tsv tatoebatools/links 66 mv *_sentences_detailed.tsv tatoebatools/sentences_detailed 67 mv versions.json tatoebatools 68 ${lib.concatMapStringsSep "\n" makeDict langs} 69 ''; 70 71 installPhase = "install -Dm644 *.{dict*,idx,ifo} -t $out"; 72 73 meta = with lib; { 74 description = "Tatoeba is a collection of sentences and translations"; 75 homepage = "https://tatoeba.org/"; 76 license = licenses.free; 77 maintainers = with maintainers; [ sikmir ]; 78 platforms = platforms.all; 79 skip.ci = true; 80 }; 81 }