package com.swabunga.spell.engine;
import java.io.*;
import java.util.*;
/**
* Another implementation of SpellDictionary
that doesn't cache any words in memory. Avoids the huge
* footprint of SpellDictionaryHashMap
at the cost of relatively minor latency. A future version
* of this class that implements some caching strategies might be a good idea in the future, if there's any
* demand for it.
*
* This implementation requires a special dictionary file, with "code*word" lines sorted by code.
* It's using a dichotomy algorithm to search for words in the dictionary
*
* @author Damien Guillaume
* @version 0.1
*/
public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {
/** Holds the dictionary file for reading*/
private RandomAccessFile dictFile = null;
/** dictionary and phonetic file encoding */
private String encoding = null;
/**
* Dictionary Convienence Constructor.
*/
public SpellDictionaryDichoDisk(File wordList)
throws FileNotFoundException, IOException {
super((File) null);
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary Convienence Constructor.
*/
public SpellDictionaryDichoDisk(File wordList, String encoding)
throws FileNotFoundException, IOException {
super((File) null);
this.encoding = encoding;
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary constructor that uses an aspell phonetic file to
* build the transformation table.
*/
public SpellDictionaryDichoDisk(File wordList, File phonetic)
throws FileNotFoundException, IOException {
super(phonetic);
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary constructor that uses an aspell phonetic file to
* build the transformation table.
*/
public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding)
throws FileNotFoundException, IOException {
super(phonetic, encoding);
this.encoding = encoding;
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Add a word permanantly to the dictionary (and the dictionary file).
* not implemented !
*/
public void addWord(String word) {
System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk");
}
/**
* Search the dictionary file for the words corresponding to the code
* within positions p1 - p2
*/
private LinkedList dichoFind(String code, long p1, long p2) throws IOException {
//System.out.println("dichoFind("+code+","+p1+","+p2+")");
long pm = (p1 + p2) / 2;
dictFile.seek(pm);
String l;
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
pm = dictFile.getFilePointer();
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
long pm2 = dictFile.getFilePointer();
if (pm2 >= p2)
return(seqFind(code, p1, p2));
int istar = l.indexOf('*');
if (istar == -1)
throw new IOException("bad format: no * !");
String testcode = l.substring(0, istar);
int comp = code.compareTo(testcode);
if (comp < 0)
return(dichoFind(code, p1, pm-1));
else if (comp > 0)
return(dichoFind(code, pm2, p2));
else {
LinkedList l1 = dichoFind(code, p1, pm-1);
LinkedList l2 = dichoFind(code, pm2, p2);
String word = l.substring(istar+1);
l1.add(word);
l1.addAll(l2);
return(l1);
}
}
private LinkedList seqFind(String code, long p1, long p2) throws IOException {
//System.out.println("seqFind("+code+","+p1+","+p2+")");
LinkedList list = new LinkedList();
dictFile.seek(p1);
while (dictFile.getFilePointer() < p2) {
String l;
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
int istar = l.indexOf('*');
if (istar == -1)
throw new IOException("bad format: no * !");
String testcode = l.substring(0, istar);
if (testcode.equals(code)) {
String word = l.substring(istar+1);
list.add(word);
}
}
return(list);
}
/**
* Read a line of dictFile with a specific encoding
*/
private String dictReadLine() throws IOException {
int max = 255;
byte b=0;
byte[] buf = new byte[max];
int i=0;
try {
for (; b != '\n' && b != '\r' && i