Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
petrkle committed Jun 12, 2016
0 parents commit ac487d3
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.oxt
en_us
dict-*
wordforms-*
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
help:
@echo "help - napoveda"
@echo "dump - dump slovniku"
@echo "clean - smaze stazene a generovane soubory"

dump:
./download.sh
./unpack.sh
./dump.sh
./pack.sh

clean:
rm -rf *.oxt en_us* dict-* wordforms-*
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CZ a SK wordforms slovníky pro Sphinx search.
10 changes: 10 additions & 0 deletions download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

for foo in \
http://downloads.sourceforge.net/project/aoo-extensions/1078/0/dict-cs-2.0.oxt \
http://downloads.sourceforge.net/project/aoo-extensions/1143/11/dict-sk.oxt \
http://downloads.sourceforge.net/project/aoo-extensions/1470/1/en_us.oxt \
http://downloads.sourceforge.net/project/aoo-extensions/1317/0/dict-en-us-names-2008-10-12.oxt
do
wget --no-clobber $foo
done
16 changes: 16 additions & 0 deletions dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

spelldump en_us/en_US.dic en_us/en_US.aff wordforms-en.txt
spelldump dict-en-us-names-2008-10-12/en_USNames.dic dict-en-us-names-2008-10-12/en_USNames.aff wordforms-en-names.txt
spelldump dict-cs-2.0/cs_CZ.dic dict-cs-2.0/cs_CZ.aff wordforms-cz.txt
spelldump dict-sk/sk_SK/sk_SK.dic dict-sk/sk_SK/sk_SK.aff wordforms-sk.txt

iconv -f ISO-8859-2 -t ASCII//TRANSLIT wordforms-cz.txt | tr '[:upper:]' '[:lower:]' | sort | uniq > wordforms-cz.txt.tmp
cat wordforms-sk.txt | tr '[:upper:]' '[:lower:]' | sort | uniq > wordforms-sk.txt.tmp
iconv -f ISO-8859-1 -t ASCII//TRANSLIT wordforms-en.txt | tr '[:upper:]' '[:lower:]' | sort | uniq > wordforms-en.txt.tmp
iconv -f ISO-8859-1 -t ASCII//TRANSLIT wordforms-en-names.txt | tr '[:upper:]' '[:lower:]' | sort | uniq > wordforms-en-names.txt.tmp

for foo in wordforms-*.txt.tmp
do
./uniq.php $foo > `echo $foo | sed "s/\.tmp$//"` && rm $foo
done
3 changes: 3 additions & 0 deletions pack.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

cd .. && tar zcf sphinxsearch-wordforms.tar.gz sphinxsearch-wordforms/wordforms-*.txt
16 changes: 16 additions & 0 deletions uniq.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/php -d memory_limit=2G
<?php

$vstup = file($argv[1]);

$vystup = array();
foreach($vstup as $line){
$line = preg_split('/ > /',trim($line));
$vystup[$line[0]] = $line[1];
}

foreach($vystup as $foo=>$bar){
if($foo != $bar){
print "$foo > $bar\n";
}
}
6 changes: 6 additions & 0 deletions unpack.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

for foo in *.oxt
do
dtrx --overwrite $foo
done

0 comments on commit ac487d3

Please sign in to comment.