package pkg.crawler.data.conn; import java.io.IOException; import java.io.ObjectOutputStream; import java.net.Socket; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Properties; import javax.mail.Address; import javax.mail.BodyPart; import javax.mail.Folder; import javax.mail.Message; import javax.mail.MessagingException; import javax.mail.Multipart; import javax.mail.Part; import javax.mail.Session; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.common.SolrInputDocument; import pkg.crawler.entity.Message2SolrEntity; import com.sun.mail.imap.ACL; import com.sun.mail.imap.IMAPFolder; import com.sun.mail.imap.IMAPMessage; import com.sun.mail.imap.IMAPStore; import com.sun.mail.imap.Rights; import com.sun.mail.imap.Rights.Right; public class Crawler { private String hostSolr; private ACL acl; private IMAPFolder imapFINBOXAux; private IMAPStore store; private String strIMAPFolderAux; public Crawler(String host, String user, String password, String imapFINBOXAux, String hostSolr) throws MessagingException { Properties props = System.getProperties(); Session session = Session.getInstance(props, null); strIMAPFolderAux = imapFINBOXAux; store = new IMAPStore(session, null); store.connect(host, user, password); this.hostSolr = hostSolr; this.imapFINBOXAux = (IMAPFolder)store.getFolder(imapFINBOXAux); //Cria a ACL a ser configurada acl = new ACL("expresso-admin"); //Define direito de leitura Rights rights = new Rights(); rights.add(Right.READ); //Configura direito de leitura para a ACL acl.setRights(rights); } public void run() { //Cria uma instância de conexão com o servidor Solr SolrServer solrServer = new HttpSolrServer(hostSolr); //Cria List listIMAPMsgs = new ArrayList(); //INBOX //Adiciona ACL de permissão de leitura, para o usuário expresso-admin conseguir fazer a leitura try { imapFINBOXAux.addACL(acl); //Abre a caixa de e-mail imapFINBOXAux.open(Folder.READ_ONLY); //Carrega as mensagens da caixa para um array Message[] msgsINBOX = imapFINBOXAux.getMessages(); int iCount = 0; //Faz a iteração entre as mensagens for (Message msgAuxINBOX : msgsINBOX) { IMAPMessage m = (IMAPMessage)msgAuxINBOX; //adiciona as mensagens na varíavel listIMAPMsgs try { dumpPart(m, listIMAPMsgs); iCount++; }catch(Exception e) { continue; } msgAuxINBOX = null; m = null; SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(listIMAPMsgs.size()-1).getId().toString()); doc.addField("user", listIMAPMsgs.get(listIMAPMsgs.size()-1).getUser().toString()); doc.addField("folder", listIMAPMsgs.get(listIMAPMsgs.size()-1).getFolder().toString()); doc.addField("msg_no", listIMAPMsgs.get(listIMAPMsgs.size()-1).getMsgNo()); doc.addField("from", listIMAPMsgs.get(listIMAPMsgs.size()-1).getFrom().toString()); doc.addField("to", listIMAPMsgs.get(listIMAPMsgs.size()-1).getTo().toString()); doc.addField("subject", listIMAPMsgs.get(listIMAPMsgs.size()-1).getSubject().toString()); doc.addField("content", listIMAPMsgs.get(listIMAPMsgs.size()-1).getContent().toString()); doc.addField("copyto", listIMAPMsgs.get(listIMAPMsgs.size()-1).getCopyto().toString()); doc.addField("sent_date", listIMAPMsgs.get(listIMAPMsgs.size()-1).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(listIMAPMsgs.size()-1).getHiddencopyto().toString()); try { solrServer.add(doc); } catch (Exception e) { System.err.println("solr -> " + e.getMessage()); } if(iCount%200 == 0) { solrServer.commit(); listIMAPMsgs.clear(); solrServer = new HttpSolrServer(hostSolr); } } imapFINBOXAux.removeACL("expresso-admin"); for(int i = 0; i < listIMAPMsgs.size(); i++ ) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(i).getId().toString()); doc.addField("user", listIMAPMsgs.get(i).getUser().toString()); doc.addField("folder", listIMAPMsgs.get(i).getFolder().toString()); doc.addField("msg_no", listIMAPMsgs.get(i).getMsgNo()); doc.addField("from", listIMAPMsgs.get(i).getFrom().toString()); doc.addField("to", listIMAPMsgs.get(i).getTo().toString()); doc.addField("subject", listIMAPMsgs.get(i).getSubject().toString()); doc.addField("content", listIMAPMsgs.get(i).getContent().toString()); doc.addField("copyto", listIMAPMsgs.get(i).getCopyto().toString()); doc.addField("sent_date", listIMAPMsgs.get(i).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(i).getHiddencopyto().toString()); try { solrServer.add(doc); } catch (Exception e) { System.err.println("solr -> " + e.getMessage()); } } listIMAPMsgs = null; solrServer.commit(); solrServer = null; for (Folder fAux : imapFINBOXAux.list()) { //Verifica se não é uma pasta compartilhada if(fAux.getFullName().split("/").length <= 3 && !fAux.getFullName().split("/")[2].equals("user")) { //INBOX IMAPFolder imapFAux = (IMAPFolder)fAux; crawIntoUserFolders(imapFAux); } } imapFINBOXAux.close(true); imapFINBOXAux = null; Socket client = null; ObjectOutputStream oos = null; try { client = new Socket("127.1.1.1", 8090); oos = new ObjectOutputStream(client.getOutputStream()); oos.writeObject(strIMAPFolderAux); oos.close(); client.close(); } catch (UnknownHostException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } catch (Exception e) { Socket client = null; ObjectOutputStream oos = null; try { client = new Socket("127.1.1.1", 8090); oos = new ObjectOutputStream(client.getOutputStream()); oos.writeObject(strIMAPFolderAux); oos.close(); client.close(); } catch (UnknownHostException ee) { e.printStackTrace(); } catch (IOException ee) { e.printStackTrace(); } return; } } private void crawIntoUserFolders(IMAPFolder imapFAux) throws Exception { SolrServer solrServer = new HttpSolrServer(hostSolr); List listIMAPMsgs = new ArrayList(); try { imapFAux.addACL(acl); }catch(Exception e) { return; } imapFAux.open(Folder.READ_ONLY); Message[] msgs= imapFAux.getMessages(); int iCount = 0; for (Message msgAux: msgs) { IMAPMessage m = (IMAPMessage)msgAux; try { dumpPart(m, listIMAPMsgs); iCount++; }catch(Exception e) { continue; } msgAux = null; m = null; SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(listIMAPMsgs.size()-1).getId().toString()); doc.addField("user", listIMAPMsgs.get(listIMAPMsgs.size()-1).getUser().toString()); doc.addField("folder", listIMAPMsgs.get(listIMAPMsgs.size()-1).getFolder().toString()); doc.addField("msg_no", listIMAPMsgs.get(listIMAPMsgs.size()-1).getMsgNo()); doc.addField("from", listIMAPMsgs.get(listIMAPMsgs.size()-1).getFrom().toString()); doc.addField("to", listIMAPMsgs.get(listIMAPMsgs.size()-1).getTo().toString()); doc.addField("subject", listIMAPMsgs.get(listIMAPMsgs.size()-1).getSubject().toString()); doc.addField("content", listIMAPMsgs.get(listIMAPMsgs.size()-1).getContent().toString()); doc.addField("copyto", listIMAPMsgs.get(listIMAPMsgs.size()-1).getCopyto().toString()); doc.addField("sent_date", listIMAPMsgs.get(listIMAPMsgs.size()-1).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(listIMAPMsgs.size()-1).getHiddencopyto().toString()); try { solrServer.add(doc); } catch (Exception e) { System.err.println("solr -> " + e.getMessage()); } if(iCount%200 == 0) { solrServer.commit(); listIMAPMsgs.clear(); solrServer = new HttpSolrServer(hostSolr); } } imapFAux.removeACL("expresso-admin"); System.out.println(imapFAux.getFullName()); for(int i = 0; i < listIMAPMsgs.size(); i++ ) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(i).getId()); doc.addField("user", listIMAPMsgs.get(i).getUser()); doc.addField("folder", listIMAPMsgs.get(i).getFolder()); doc.addField("msg_no", listIMAPMsgs.get(i).getMsgNo()); doc.addField("from", listIMAPMsgs.get(i).getFrom()); doc.addField("to", listIMAPMsgs.get(i).getTo()); doc.addField("subject", listIMAPMsgs.get(i).getSubject()); doc.addField("content", listIMAPMsgs.get(i).getContent()); doc.addField("copyto", listIMAPMsgs.get(i).getCopyto()); doc.addField("sent_date", listIMAPMsgs.get(i).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(i).getHiddencopyto()); } listIMAPMsgs = null; imapFAux.close(true); imapFAux = null; solrServer.commit(); solrServer = null; } private void dumpPart(IMAPMessage m, List listIMAPMsgs) throws Exception { //Verifica se possui identificador para poder continuar if(m.getMessageID() == null || m.getMessageID().equals("")){ return; } Message2SolrEntity msgEntity = null; msgEntity = new Message2SolrEntity(); msgEntity.setId(new StringBuilder( m.getMessageID())); String user = m.getFolder().getFullName().split("/")[1]; msgEntity.setUser(new StringBuilder(user)); String folder = m.getFolder().getFullName().split("/") [m.getFolder().getFullName().split("/").length-1]; if(!user.trim().equals(folder.trim())) { msgEntity.setFolder(new StringBuilder(folder)); } else { msgEntity.setFolder(new StringBuilder("INBOX")); } msgEntity.setMsgNo(new StringBuilder(String.valueOf(m.getMessageNumber()))); if(m.getSubject() != null){ msgEntity.setSubject(new StringBuilder( m.getSubject() )); } Address[] a; // FROM if ((a = m.getFrom()) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getFrom() == null) { msgEntity.setFrom(new StringBuilder(a[j].toString())); } else { msgEntity.setFrom(msgEntity.getFrom().append(", ").append(a[j].toString())); } // System.out.println("FROM: " + a[j].toString()); } } // TO if ((a = m.getRecipients(Message.RecipientType.TO)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getTo() == null) { msgEntity.setTo(new StringBuilder(a[j].toString())); } else { msgEntity.setTo(msgEntity.getTo().append(", ").append(a[j].toString())); } } } // CC if ((a = m.getRecipients(Message.RecipientType.CC)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getCopyto() == null) { msgEntity.setCopyto(new StringBuilder(a[j].toString())); } else { msgEntity.setCopyto(msgEntity.getCopyto().append(", ").append(a[j].toString())); } } } // CC if ((a = m.getRecipients(Message.RecipientType.BCC)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getHiddencopyto() == null) { msgEntity.setHiddencopyto(new StringBuilder(a[j].toString())); } else { msgEntity.setHiddencopyto(new StringBuilder(msgEntity.getHiddencopyto() + ","+a[j].toString())); } } } // DATE Date d = m.getSentDate(); if(msgEntity != null) { if(d != null) { msgEntity.setSent_date(new StringBuilder(String.valueOf(d.getTime()))); } else { msgEntity.setSent_date(new StringBuilder(String.valueOf(new Date().getTime()))); } } try{ Object o = m.getContent(); if (o instanceof String) { if( msgEntity != null ) { msgEntity.setContent(new StringBuilder( o.toString())); } } else if (o instanceof Multipart) { Multipart multipart = (Multipart)o; int count = multipart.getCount(); for (int i = 0; i < count; i++) { if(msgEntity.getContent() == null || msgEntity.getContent().toString().trim().equals("")) { msgEntity.setContent(new StringBuilder("")); } if(multipart.getBodyPart(i).getContentType() != Part.ATTACHMENT) { msgEntity.setContent(msgEntity.getContent().append(" ").append(getPlainContent(multipart.getBodyPart(i)))); } } } }catch (Exception e){ System.out.println("Exception ocurred!"); e.printStackTrace(); } if(msgEntity != null && msgEntity.getId() != null){ listIMAPMsgs.add(msgEntity); } m = null; } private StringBuilder getPlainContent(BodyPart bodyPart) throws IOException, MessagingException { try { if(bodyPart.getContent() instanceof String) { StringBuilder text = new StringBuilder(bodyPart.getContent().toString()); return text; } }catch(Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); return new StringBuilder(""); } return new StringBuilder(""); } }