package com.swabunga.spell.engine; import java.io.*; import java.util.*; /** * Another implementation of SpellDictionary that doesn't cache any words in memory. Avoids the huge * footprint of SpellDictionaryHashMap at the cost of relatively minor latency. A future version * of this class that implements some caching strategies might be a good idea in the future, if there's any * demand for it. * * This implementation requires a special dictionary file, with "code*word" lines sorted by code. * It's using a dichotomy algorithm to search for words in the dictionary * * @author Damien Guillaume * @version 0.1 */ public class SpellDictionaryDichoDisk extends SpellDictionaryASpell { /** Holds the dictionary file for reading*/ private RandomAccessFile dictFile = null; /** dictionary and phonetic file encoding */ private String encoding = null; /** * Dictionary Convienence Constructor. */ public SpellDictionaryDichoDisk(File wordList) throws FileNotFoundException, IOException { super((File) null); dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary Convienence Constructor. */ public SpellDictionaryDichoDisk(File wordList, String encoding) throws FileNotFoundException, IOException { super((File) null); this.encoding = encoding; dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary constructor that uses an aspell phonetic file to * build the transformation table. */ public SpellDictionaryDichoDisk(File wordList, File phonetic) throws FileNotFoundException, IOException { super(phonetic); dictFile = new RandomAccessFile(wordList, "r"); } /** * Dictionary constructor that uses an aspell phonetic file to * build the transformation table. */ public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding) throws FileNotFoundException, IOException { super(phonetic, encoding); this.encoding = encoding; dictFile = new RandomAccessFile(wordList, "r"); } /** * Add a word permanantly to the dictionary (and the dictionary file). * not implemented ! */ public void addWord(String word) { System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk"); } /** * Search the dictionary file for the words corresponding to the code * within positions p1 - p2 */ private LinkedList dichoFind(String code, long p1, long p2) throws IOException { //System.out.println("dichoFind("+code+","+p1+","+p2+")"); long pm = (p1 + p2) / 2; dictFile.seek(pm); String l; if (encoding == null) l = dictFile.readLine(); else l = dictReadLine(); pm = dictFile.getFilePointer(); if (encoding == null) l = dictFile.readLine(); else l = dictReadLine(); long pm2 = dictFile.getFilePointer(); if (pm2 >= p2) return(seqFind(code, p1, p2)); int istar = l.indexOf('*'); if (istar == -1) throw new IOException("bad format: no * !"); String testcode = l.substring(0, istar); int comp = code.compareTo(testcode); if (comp < 0) return(dichoFind(code, p1, pm-1)); else if (comp > 0) return(dichoFind(code, pm2, p2)); else { LinkedList l1 = dichoFind(code, p1, pm-1); LinkedList l2 = dichoFind(code, pm2, p2); String word = l.substring(istar+1); l1.add(word); l1.addAll(l2); return(l1); } } private LinkedList seqFind(String code, long p1, long p2) throws IOException { //System.out.println("seqFind("+code+","+p1+","+p2+")"); LinkedList list = new LinkedList(); dictFile.seek(p1); while (dictFile.getFilePointer() < p2) { String l; if (encoding == null) l = dictFile.readLine(); else l = dictReadLine(); int istar = l.indexOf('*'); if (istar == -1) throw new IOException("bad format: no * !"); String testcode = l.substring(0, istar); if (testcode.equals(code)) { String word = l.substring(istar+1); list.add(word); } } return(list); } /** * Read a line of dictFile with a specific encoding */ private String dictReadLine() throws IOException { int max = 255; byte b=0; byte[] buf = new byte[max]; int i=0; try { for (; b != '\n' && b != '\r' && i