/**
* MailArchiver is an application that provides services for storing and managing e-mail messages through a Web Services SOAP interface.
* Copyright (C) 2012 Marcio Andre Scholl Levien and Fernando Alberto Reuter Wendt and Jose Ronaldo Nogueira Fonseca Junior
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
/******************************************************************************\
*
* This product was developed by
*
* SERVIÇO FEDERAL DE PROCESSAMENTO DE DADOS (SERPRO),
*
* a government company established under Brazilian law (5.615/70),
* at Department of Development of Porto Alegre.
*
\******************************************************************************/
package serpro.mailarchiver.service.web;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Date;
import java.util.Map.Entry;
import java.util.Stack;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.jdo.annotations.PersistenceAware;
import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.codec.DecoderUtil;
import org.apache.james.mime4j.io.LineNumberInputStream;
import org.apache.james.mime4j.message.SimpleContentHandler;
import org.apache.james.mime4j.parser.MimeStreamParser;
import org.apache.james.mime4j.stream.BodyDescriptor;
import org.apache.james.mime4j.stream.MimeConfig;
import org.apache.james.mime4j.util.CharsetUtil;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.springframework.beans.factory.annotation.Autowired;
import com.google.common.base.Supplier;
import com.google.common.io.Files;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.TextExtractor;
import serpro.mailarchiver.domain.metaarchive.*;
import serpro.mailarchiver.service.BaseService;
import serpro.mailarchiver.service.dto.TMessage;
import serpro.mailarchiver.service.find.FFolder;
import serpro.mailarchiver.service.find.FMessage;
import serpro.mailarchiver.session.Session;
import serpro.mailarchiver.util.Charsets;
import serpro.mailarchiver.util.Logger;
import serpro.mailarchiver.util.jdo.PersistenceManager;
import serpro.mailarchiver.util.transaction.WithReadWriteTx;
@PersistenceAware
public class DefaultArchiveOperation
extends BaseService
implements ArchiveOperation
{
private static class ExpungedException extends RuntimeException {}
private static class RepeatedException extends RuntimeException {}
private static final Logger log = Logger.getLocalLogger();
private static final String CR = "\015";
private static final String LF = "\012";
private static final String CRLF = "\015\012";
@Autowired
private FFolder findFolder;
@Autowired
private FMessage findMessage;
@WithReadWriteTx
@Override
public TMessage apply(String folderId, String message) throws ServiceFault {
PersistenceManager pm = getPersistenceManager();
Folder folder = findFolder.byId(folderId);
if(folder == null) {
ServiceFault.folderNotFound()
.setActor("archive")
.setMessage("Destination folder not found.")
.addValue("folderId", folderId)
.raise();
}
MimeConfig config = new MimeConfig();
config.setStrictParsing(false);
config.setMaxLineLen(-1);
config.setMaxContentLen(-1);
config.setMaxHeaderLen(-1);
config.setMaxHeaderCount(-1);
Message rootMessage = null;
MessageHandler handler = null;
try {
handler = new MessageHandler(folder);
MimeStreamParser parser = new MimeStreamParser(config);
parser.setContentHandler(handler);
parser.setContentDecoding(true);
byte[] ba = message.getBytes(Charsets.Windows_1252);
switch(ba[ba.length - 1]) {
case 0x0A: //LF
break;
case 0x0D: //CR
ba = Arrays.copyOf(ba, ba.length + 1);
ba[ba.length - 1] = 0x0A;
break;
default:
ba = Arrays.copyOf(ba, ba.length + 2);
ba[ba.length - 2] = 0x0D;
ba[ba.length - 1] = 0x0A;
break;
}
parser.parse(new ByteArrayInputStream(ba));
//debug
//MimeTokenStream mimeTokenStream = parser.getMimeTokenStream();
//LineNumberInputStream.Entity lineNumberRootEntity = mimeTokenStream.getLineNumberRootEntity();
//String dump = lineNumberRootEntity.dump("root entity");
//System.out.println(dump);
rootMessage = handler.getRootMessage();
rootMessage.sync();
rootMessage.setFolder(folder);
rootMessage.setUnseen(true);
rootMessage.setUnanswered(true);
UnstructuredField importanceField = (UnstructuredField)rootMessage.getField("importance");
if(importanceField != null) {
String importance = importanceField.getText().toLowerCase().trim();
if("high".equals(importance)) {
rootMessage.setImportanceHigh(true);
}
else if("low".equals(importance)) {
rootMessage.setImportanceLow(true);
}
else {
rootMessage.setImportanceNormal(true);
}
}
pm.makePersistent(rootMessage);
Path msgFile = folder.getAbsolutePath().resolve(rootMessage.getOid() + ".eml");
Files.write(ba, msgFile.toFile());
Document luceneDoc = handler.getLuceneDoc();
Session.getLuceneIndex().addMessage(luceneDoc);
final Message rootMsg = rootMessage;
log.info("Mensagem arquivada com sucesso:%n%n%1$s", new Supplier() {
@Override
public Object[] get() {
String msgDOM = rootMsg.dumpTree();
//debug
//System.out.println(msgDOM);
return new Object[] { msgDOM };
}
});
}
catch(ExpungedException e) {
ServiceFault.expungedMessage()
.setActor("archive")
.raise();
}
catch(RepeatedException e) {
rootMessage = handler.getRootMessage();
}
catch(Throwable t) {
ServiceFault.archiveFailure()
.setActor("archive")
.setMessage("Archive failure.")
.setCause(t)
.raise();
}
return new TMessage(rootMessage);
}
private class MessageHandler extends SimpleContentHandler {
private static final int MSG_FLAG_EXPUNGED = 0x0008;
private Stack entityStack;
private Stack multipartStack;
private final Folder folder;
private Message rootMessage;
private LineNumberInputStream.Entity rootLnisEntity;
private Document luceneDoc;
private boolean parsingRoot;
Message getRootMessage() {
return rootMessage;
}
Document getLuceneDoc() {
return luceneDoc;
}
public MessageHandler(Folder folder) {
this.folder = folder;
entityStack = new Stack();
multipartStack = new Stack();
}
//message_id
private void addStoredNotAnalyzedIndexField(String name, String value) {
org.apache.lucene.document.Field field = new org.apache.lucene.document.Field(
name, value,
org.apache.lucene.document.Field.Store.YES,
org.apache.lucene.document.Field.Index.NOT_ANALYZED);
field.setIndexOptions(IndexOptions.DOCS_ONLY);
luceneDoc.add(field);
}
//date, from_mbox, sender_mbox, to_mbox, cc_mbox, bcc_mbox
private void addNotAnalyzedIndexField(String name, String value) {
org.apache.lucene.document.Field field = new org.apache.lucene.document.Field(
name, value,
org.apache.lucene.document.Field.Store.NO,
org.apache.lucene.document.Field.Index.NOT_ANALYZED);
field.setIndexOptions(IndexOptions.DOCS_ONLY);
luceneDoc.add(field);
}
//subject, from, sender, to, cc, bcc, body
private void addAnalyzedIndexField(String name, String value) {
org.apache.lucene.document.Field field = new org.apache.lucene.document.Field(
name, value,
org.apache.lucene.document.Field.Store.NO,
org.apache.lucene.document.Field.Index.ANALYZED);
field.setIndexOptions(IndexOptions.DOCS_ONLY);
luceneDoc.add(field);
}
private String decodeEncodedWords(String text) {
String decoded = DecoderUtil.decodeEncodedWords(text, new DecodeMonitor() {
@Override
public boolean warn(String error, String dropDesc) {
log.warn("error: %s\ndropDesc: %s", error, dropDesc);
return false;
}
});
//debug
//if( ! decoded.equals(text)) {
// System.out.println(String.format("original: %s\ndecoded: %s", text, decoded));
//}
return decoded;
}
@Override
public void startMessage(LineNumberInputStream.Entity lnisEntity) {
if(entityStack.empty()) {
//o empilhamento da mensagem raiz é postergado até
//a leitura do primeiro header para verificar expungimento
rootLnisEntity = lnisEntity;
return;
}
final MessageBody messageBody = new MessageBody();
messageBody.setEntity(entityStack.peek());
final EmbeddedMessage embeddedMessage = new EmbeddedMessage(lnisEntity);
embeddedMessage.setComposite(messageBody);
entityStack.push(embeddedMessage);
log.debug("Mime Parse Event: START EMBEDDED MESSAGE\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = embeddedMessage.dumpPath();
//System.out.println("START EMBEDDED MESSAGE");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void startMultipart(BodyDescriptor bd) {
final Multipart multipart = new Multipart();
multipart.setEntity(entityStack.peek());
multipartStack.push(multipart);
log.debug("Mime Parse Event: START MULTIPART\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = multipart.dumpPath();
//System.out.println("START MULTIPART");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void preamble(InputStream is) {
Multipart multipart = multipartStack.peek();
multipart.setPreambleStream(is);
}
@Override
public void startBodyPart(LineNumberInputStream.Entity lnisEntity) {
final BodyPart bodyPart = new BodyPart(lnisEntity);
bodyPart.setComposite(multipartStack.peek());
entityStack.push(bodyPart);
log.debug("Mime Parse Event: START BODY PART\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = bodyPart.dumpPath();
//System.out.println("START BODY PART");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void headers(org.apache.james.mime4j.dom.Header hd) {
if(entityStack.empty()) {
parsingRoot = true;
//o empilhamento da mensagem raiz é postergado até a leitura do primeiro header para
//verificar se a mensagem está marcada como expungida pelo Thunderbird, isto é,
//se a mesma foi apagada ou movida de uma pasta que não foi compactada após este evento
org.apache.james.mime4j.dom.field.UnstructuredField field =
(org.apache.james.mime4j.dom.field.UnstructuredField) hd.getField("X-Mozilla-Status");
if(field != null) {
if((Integer.parseInt(field.getValue(), 16) & MSG_FLAG_EXPUNGED) > 0) {
log.debug(">>EXPUNGED<<%n%n%s", hd.toString());
throw new ExpungedException();
}
}
try {
MessageDigest md = MessageDigest.getInstance("MD5");
md.update(folder.getOid().getBytes("UTF-8"));
md.update(hd.toString().getBytes("UTF-8"));
char[] hexDigest = new char[32];
int i = 0;
for(byte b : md.digest()) {
hexDigest[i++] = Character.forDigit((char)((b & 0xf0) >>> 4), 16);
hexDigest[i++] = Character.forDigit((char)(b & 0x0f), 16);
}
String digest = new String(hexDigest);
String guid = digest.substring(0, 8) +
"-" + digest.substring(8, 12) +
"-" + digest.substring(12, 16) +
"-" + digest.substring(16, 20) +
"-" + digest.substring(20, 32);
rootMessage = findMessage.byId(guid);
if(rootMessage == null) {
rootMessage = new Message(rootLnisEntity);
rootMessage.setOid(guid);
luceneDoc = new Document();
}
else if(rootMessage.getFolder() == folder) {
log.warn(">>REPEATED<<%n%nFolder:%s%n%n%s", folder.getRelativePath(), hd.toString());
throw new RepeatedException();
}
else {
rootMessage = new Message(rootLnisEntity);
rootMessage.setOid(UUID.randomUUID().toString());
luceneDoc = new Document();
}
}
catch(NoSuchAlgorithmException ex) {
}
catch(UnsupportedEncodingException ex) {
}
entityStack.push(rootMessage);
addStoredNotAnalyzedIndexField("message_id", rootMessage.getOid());
log.debug("Mime Parse Event: START MESSAGE\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = rootMessage.dumpPath();
//System.out.println("START MESSAGE");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
else {
parsingRoot = false;
}
for(org.apache.james.mime4j.stream.Field f : hd) {
final String fldName = f.getName();
//
if(f instanceof org.apache.james.mime4j.dom.field.UnstructuredField) {
//**********************************************************
//
// UNSTRUCTURED FIELD
//
// "Subject", "Message-Id", "Resent-Msg-Id",
// "Comments", "Keywords", etc
//
//**********************************************************
org.apache.james.mime4j.dom.field.UnstructuredField uf =
(org.apache.james.mime4j.dom.field.UnstructuredField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(uf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = uf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String value = uf.getValue();
if(value != null) {
if(("Message-ID".equalsIgnoreCase(fldName))
|| ("Resent-Message-ID".equalsIgnoreCase(fldName))) {
Matcher m = Pattern.compile("<([^>]+)>").matcher(value);
if(m.find()) {
field.setText(m.group(1));
}
else {
log.error("%s field does not match msg-id format: %s", fldName, value);
field.setText(value);
}
}
else {
value = decodeEncodedWords(value);
field.setText(value);
}
}
field.setEntity(entityStack.peek());
if(parsingRoot && (value != null)) {
if("subject".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("subject", value);
}
}
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.MimeVersionField) {
//**********************************************************
//
// MIME VERSION FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.MimeVersionField mvf =
(org.apache.james.mime4j.dom.field.MimeVersionField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(mvf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = mvf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String mimeVersion = mvf.getMajorVersion() + "." + mvf.getMinorVersion();
field.setText(mimeVersion);
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentMD5Field) {
//**********************************************************
//
// CONTENT MD5 FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentMD5Field cmd5f =
(org.apache.james.mime4j.dom.field.ContentMD5Field) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(cmd5f.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = cmd5f.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String md5raw = cmd5f.getMD5Raw();
if(md5raw != null) {
field.setText(md5raw);
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentLocationField) {
//**********************************************************
//
// CONTENT LOCATION FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentLocationField clf =
(org.apache.james.mime4j.dom.field.ContentLocationField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(clf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String location = clf.getLocation();
if(location != null) {
field.setText(location);
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentLengthField) {
//**********************************************************
//
// CONTENT LENGTH FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentLengthField clf =
(org.apache.james.mime4j.dom.field.ContentLengthField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(clf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String length = Long.toString(clf.getContentLength());
field.setText(length);
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentLanguageField) {
//**********************************************************
//
// CONTENT LANGUAGE FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentLanguageField clf =
(org.apache.james.mime4j.dom.field.ContentLanguageField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(clf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String languages = null;
for(String language : clf.getLanguages()) {
if(languages == null) {
languages = language;
}
else {
languages += "|" + language;
}
}
if(languages != null) {
field.setText(languages);
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentIdField) {
//**********************************************************
//
// CONTENT ID FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentIdField cidf =
(org.apache.james.mime4j.dom.field.ContentIdField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(cidf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = cidf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String cid = cidf.getId();
if(cid != null) {
Matcher m = Pattern.compile("<([^>]+)>").matcher(cid);
if(m.find()) {
field.setText(m.group(1));
}
else {
log.error("%s field does not match content-id format: %s", fldName, cid);
field.setText(cid);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentDescriptionField) {
//**********************************************************
//
// CONTENT DESCRIPTION FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentDescriptionField cdf =
(org.apache.james.mime4j.dom.field.ContentDescriptionField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(cdf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = cdf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String description = cdf.getDescription();
if(description != null) {
description = decodeEncodedWords(description);
field.setText(description);
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentTransferEncodingField) {
//**********************************************************
//
// CONTENT TRANSFER ENCODING FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentTransferEncodingField ctef =
(org.apache.james.mime4j.dom.field.ContentTransferEncodingField) f;
final UnstructuredField field = new UnstructuredField();
field.setName(fldName);
field.setValid(ctef.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = ctef.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String encoding = ctef.getEncoding();
if(encoding != null) {
field.setText(encoding);
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("UNSTRUCTURED FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentTypeField) {
//**********************************************************
//
// CONTENT TYPE FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentTypeField ctf =
(org.apache.james.mime4j.dom.field.ContentTypeField) f;
final ContentTypeField field = new ContentTypeField();
field.setName(fldName);
field.setValid(ctf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = ctf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String mimeType = ctf.getMimeType();
if(mimeType != null) {
String[] mimeTypeSplit = mimeType.split("/");
field.setMediaType(mimeTypeSplit[0]);
field.setSubType(mimeTypeSplit[1]);
}
for(Entry entry : ctf.getParameters().entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
if((key != null) && (!key.isEmpty()) && (value != null) && (!value.isEmpty())) {
value = decodeEncodedWords(value);
field.addParameter(key, value);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: CONTENT TYPE FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("CONTENT TYPE FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.ContentDispositionField) {
//**********************************************************
//
// CONTENT DISPOSITION FIELD
//
//**********************************************************
org.apache.james.mime4j.dom.field.ContentDispositionField cdf =
(org.apache.james.mime4j.dom.field.ContentDispositionField) f;
final ContentDispositionField field = new ContentDispositionField();
field.setName(fldName);
field.setValid(cdf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = cdf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
String dispositionType = cdf.getDispositionType();
if(dispositionType != null) {
field.setDispositionType(dispositionType);
}
for(Entry entry : cdf.getParameters().entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
if((key != null) && (!key.isEmpty()) && (value != null) && (!value.isEmpty())) {
value = decodeEncodedWords(value);
field.addParameter(key, value);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: CONTENT DISPOSITION FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("CONTENT DISPOSITION FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.DateTimeField) {
//**********************************************************
//
// DATE TIME FIELD
//
// "Date", "Resent-Date"
//
//**********************************************************
org.apache.james.mime4j.dom.field.DateTimeField dtf =
(org.apache.james.mime4j.dom.field.DateTimeField) f;
final DateTimeField field = new DateTimeField();
field.setName(fldName);
field.setValid(dtf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = dtf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
Date date = dtf.getDate();
if(date != null) {
field.setDate(date);
}
field.setEntity(entityStack.peek());
if(parsingRoot && (date != null)) {
if("date".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("date", DateTools.dateToString(date, Resolution.SECOND));
}
}
log.debug("Mime Parse Event: DATE TIME FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("DATE TIME FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.AddressListField) {
//**********************************************************
//
// ADDRESS LIST FIELD
//
// "To", "Cc", "Bcc", "Reply-To",
// "Resent-To", "Resent-Cc", "Resent-Bcc"
//
//**********************************************************
org.apache.james.mime4j.dom.field.AddressListField alf =
(org.apache.james.mime4j.dom.field.AddressListField) f;
final AddressListField field = new AddressListField();
field.setName(fldName);
field.setValid(alf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = alf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: ADDRESS LIST FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("ADDRESS LIST FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
for(org.apache.james.mime4j.dom.address.Address a : alf.getAddressList()) {
if(a instanceof org.apache.james.mime4j.dom.address.Mailbox) {
//**************************************************
//
// address list field MAILBOX
//
//**************************************************
org.apache.james.mime4j.dom.address.Mailbox m =
(org.apache.james.mime4j.dom.address.Mailbox) a;
final AddressListField_Mailbox mailbox = new AddressListField_Mailbox();
String mname = m.getName();
if(mname != null) {
mname = decodeEncodedWords(mname);
mailbox.setName(mname);
}
String localPart = m.getLocalPart();
if(localPart != null) {
mailbox.setLocalPart(localPart);
}
String domain = m.getDomain();
if(domain != null) {
mailbox.setDomain(domain);
}
org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute();
if(dl.size() > 0) {
mailbox.setRoute(dl.toRouteString());
}
mailbox.setField(field);
if(parsingRoot && (mname != null)) {
if("to".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("to", mname);
}
else if("cc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("cc", mname);
}
else if("bcc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("bcc", mname);
}
}
if(parsingRoot && (localPart != null) && (domain != null)) {
String mbox = localPart + "@" + domain;
if("to".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("to_mbox", mbox);
}
else if("cc".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("cc_mbox", mbox);
}
else if("bcc".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("bcc_mbox", mbox);
}
}
log.debug("Mime Parse Event: ADDRESS LIST FIELD MAILBOX\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = mailbox.dumpPath();
//System.out.println("ADDRESS LIST FIELD MAILBOX");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
else if(a instanceof org.apache.james.mime4j.dom.address.Group) {
//**************************************************
//
// address list field GROUP
//
//**************************************************
org.apache.james.mime4j.dom.address.Group g =
(org.apache.james.mime4j.dom.address.Group) a;
final AddressListField_Group group = new AddressListField_Group();
String gname = g.getName();
if(gname != null) {
gname = decodeEncodedWords(gname);
group.setName(gname);
}
group.setField(field);
if(parsingRoot && (gname != null)) {
if("to".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("to", gname);
}
else if("cc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("cc", gname);
}
else if("bcc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("bcc", gname);
}
}
log.debug("Mime Parse Event: GROUP\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = group.dumpPath();
//System.out.println("GROUP");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
for(org.apache.james.mime4j.dom.address.Mailbox m : g.getMailboxes()) {
//**********************************************
//
// address list field GROUP MAILBOX
//
//**********************************************
final AddressListField_Group_Mailbox mailbox = new AddressListField_Group_Mailbox();
String mname = m.getName();
if(mname != null) {
mname = decodeEncodedWords(mname);
mailbox.setName(mname);
}
String localPart = m.getLocalPart();
if(localPart != null) {
mailbox.setLocalPart(localPart);
}
String domain = m.getDomain();
if(domain != null) {
mailbox.setDomain(domain);
}
org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute();
if(dl.size() > 0) {
mailbox.setRoute(dl.toRouteString());
}
mailbox.setGroup(group);
if(parsingRoot && (mname != null)) {
if("to".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("to", mname);
}
else if("cc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("cc", mname);
}
else if("bcc".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("bcc", mname);
}
}
if(parsingRoot && (localPart != null) && (domain != null)) {
String mbox = localPart + "@" + domain;
if("to".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("to_mbox", mbox);
}
else if("cc".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("cc_mbox", mbox);
}
else if("bcc".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("bcc_mbox", mbox);
}
}
log.debug("Mime Parse Event: GROUP MAILBOX\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = mailbox.dumpPath();
//System.out.println("GROUP MAILBOX");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
}
}
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.MailboxListField) {
//**********************************************************
//
// MAILBOX LIST FIELD
//
// "From", "Resent-From"
//
//**********************************************************
org.apache.james.mime4j.dom.field.MailboxListField mlf =
(org.apache.james.mime4j.dom.field.MailboxListField) f;
final MailboxListField field = new MailboxListField();
field.setName(fldName);
field.setValid(mlf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = mlf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: MAILBOX LIST FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("MAILBOX LIST FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
for(org.apache.james.mime4j.dom.address.Mailbox m : mlf.getMailboxList()) {
//******************************************************
//
// mailbox list field MAILBOX
//
//******************************************************
final MailboxListField_Mailbox mailbox = new MailboxListField_Mailbox();
String mname = m.getName();
if(mname != null) {
mname = decodeEncodedWords(mname);
mailbox.setName(mname);
}
String localPart = m.getLocalPart();
if(localPart != null) {
mailbox.setLocalPart(localPart);
}
String domain = m.getDomain();
if(domain != null) {
mailbox.setDomain(domain);
}
org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute();
if(dl.size() > 0) {
mailbox.setRoute(dl.toRouteString());
}
mailbox.setField(field);
if(parsingRoot && (mname != null)) {
if("from".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("from", mname);
}
}
if(parsingRoot && (localPart != null) && (domain != null)) {
String mbox = localPart + "@" + domain;
if("from".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("from_mbox", mbox);
}
}
log.debug("Mime Parse Event: MAILBOX LIST FIELD MAILBOX\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = mailbox.dumpPath();
//System.out.println("MAILBOX LIST FIELD MAILBOX");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
}
//
//
else if(f instanceof org.apache.james.mime4j.dom.field.MailboxField) {
//**********************************************************
//
// MAILBOX FIELD
//
// "Sender", "Resent-Sender"
//
//**********************************************************
org.apache.james.mime4j.dom.field.MailboxField mf =
(org.apache.james.mime4j.dom.field.MailboxField) f;
final MailboxField field = new MailboxField();
field.setName(fldName);
field.setValid(mf.isValidField());
if(!field.isValid()) {
org.apache.james.mime4j.dom.field.ParseException e = mf.getParseException();
if(e != null) {
field.addParseExceptionStackTrace(e);
}
}
field.setEntity(entityStack.peek());
log.debug("Mime Parse Event: MAILBOX FIELD\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = field.dumpPath();
//System.out.println("MAILBOX FIELD");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
//**********************************************************
//
// mailbox field MAILBOX
//
//**********************************************************
org.apache.james.mime4j.dom.address.Mailbox m = mf.getMailbox();
final MailboxField_Mailbox mailbox = new MailboxField_Mailbox();
String mname = m.getName();
if(mname != null) {
mname = decodeEncodedWords(mname);
mailbox.setName(mname);
}
String localPart = m.getLocalPart();
if(localPart != null) {
mailbox.setLocalPart(localPart);
}
String domain = m.getDomain();
if(domain != null) {
mailbox.setDomain(domain);
}
org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute();
if(dl.size() > 0) {
mailbox.setRoute(dl.toRouteString());
}
mailbox.setField(field);
if(parsingRoot && (mname != null)) {
if("sender".equalsIgnoreCase(fldName)) {
addAnalyzedIndexField("sender", mname);
}
}
if(parsingRoot && (localPart != null) && (domain != null)) {
String mbox = localPart + "@" + domain;
if("sender".equalsIgnoreCase(fldName)) {
addNotAnalyzedIndexField("sender_mbox", mbox);
}
}
log.debug("Mime Parse Event: MAILBOX FIELD MAILBOX\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = mailbox.dumpPath();
//System.out.println("MAILBOX FIELD MAILBOX");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
//
}
}
@Override
public void body(BodyDescriptor bd, InputStream is) {
boolean isText = bd.getMediaType().equalsIgnoreCase("text");
final SingleBody singleBody = isText ? new TextBody() : new BinaryBody();
singleBody.setOid(UUID.randomUUID().toString());
singleBody.setEntity(entityStack.peek());
int size = 0;
int capacity = 0x10000;
byte[] textBuf = null;
if(isText) {
textBuf = new byte[capacity];
}
try {
byte[] buf = new byte[0x1000];
int n;
while((n = is.read(buf)) >= 0) {
if(isText) {
if((size + n) >= capacity) {
capacity += 0x10000;
textBuf = Arrays.copyOf(textBuf, capacity);
}
System.arraycopy(buf, 0, textBuf, size, n);
}
size += n;
}
}
catch(IOException ex) {
log.error(ex);
}
singleBody.setSize(size);
singleBody.getEntity().incSize(size);
if(isText) {
Charset cs = CharsetUtil.lookup(bd.getCharset());
if(cs == null) {
cs = Charsets.Windows_1252;
}
String text = new String(textBuf, 0, size, cs);
//debug
//System.out.println("--- text body ----");
//System.out.println(text);
//System.out.println("------------------");
String textExt;
if(bd.getSubType().equalsIgnoreCase("html")) {
Source html = new Source(text);
html.fullSequentialParse();
Element body = html.getFirstElement("body");
TextExtractor textExtractor = (body != null)
? new TextExtractor(body)
: new TextExtractor(html);
textExt = textExtractor.toString();
}
else {
textExt = text.replaceAll("(" + CRLF + "|" + CR + "|" + LF + ")+", " ");
}
String preview = textExt.substring(0, Math.min(textExt.length(), 300));
//debug
//System.out.println("---- preview -----");
//System.out.println(preview);
//System.out.println("------------------");
((TextBody)singleBody).setPreview(preview);
addAnalyzedIndexField("body", textExt);
}
log.debug("Mime Parse Event: SINGLE BODY\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = singleBody.dumpPath();
//System.out.println("SINGLE BODY");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void endBodyPart() {
final BodyPart bodyPart = (BodyPart) entityStack.pop();
log.debug("Mime Parse Event: END BODY PART\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = bodyPart.dumpPath();
//System.out.println("END BODY PART");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void epilogue(InputStream is) {
Multipart multipart = multipartStack.peek();
multipart.setEpilogueStream(is);
}
@Override
public void endMultipart() {
final Multipart multipart = multipartStack.pop();
log.debug("Mime Parse Event: END MULTIPART\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = multipart.dumpPath();
//System.out.println("END MULTIPART");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
@Override
public void endMessage() {
final Entity entity = entityStack.pop();
if(entity instanceof EmbeddedMessage) {
final EmbeddedMessage embeddedMessage = (EmbeddedMessage) entity;
log.debug("Mime Parse Event: END EMBEDDED MESSAGE\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = embeddedMessage.dumpPath();
//System.out.println("END EMBEDDED MESSAGE");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
else if(entity instanceof Message) {
final Message message = (Message) entity;
log.debug("Mime Parse Event: END MESSAGE\n\n%s", new Supplier() {
@Override
public Object[] get() {
String domPath = message.dumpPath();
//System.out.println("END MESSAGE");
//System.out.println(domPath);
return new Object[] { domPath };
}
});
}
}
}
}