001/****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one   *
003 * or more contributor license agreements.  See the NOTICE file *
004 * distributed with this work for additional information        *
005 * regarding copyright ownership.  The ASF licenses this file   *
006 * to you under the Apache License, Version 2.0 (the            *
007 * "License"); you may not use this file except in compliance   *
008 * with the License.  You may obtain a copy of the License at   *
009 *                                                              *
010 *   http://www.apache.org/licenses/LICENSE-2.0                 *
011 *                                                              *
012 * Unless required by applicable law or agreed to in writing,   *
013 * software distributed under the License is distributed on an  *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015 * KIND, either express or implied.  See the License for the    *
016 * specific language governing permissions and limitations      *
017 * under the License.                                           *
018 ****************************************************************/
019
020package org.apache.james.mime4j.field;
021
022import org.apache.james.mime4j.MimeException;
023import org.apache.james.mime4j.codec.DecodeMonitor;
024import org.apache.james.mime4j.dom.FieldParser;
025import org.apache.james.mime4j.dom.field.AddressListField;
026import org.apache.james.mime4j.dom.field.ContentDescriptionField;
027import org.apache.james.mime4j.dom.field.ContentDispositionField;
028import org.apache.james.mime4j.dom.field.ContentIdField;
029import org.apache.james.mime4j.dom.field.ContentLanguageField;
030import org.apache.james.mime4j.dom.field.ContentLengthField;
031import org.apache.james.mime4j.dom.field.ContentLocationField;
032import org.apache.james.mime4j.dom.field.ContentMD5Field;
033import org.apache.james.mime4j.dom.field.ContentTransferEncodingField;
034import org.apache.james.mime4j.dom.field.ContentTypeField;
035import org.apache.james.mime4j.dom.field.DateTimeField;
036import org.apache.james.mime4j.dom.field.FieldName;
037import org.apache.james.mime4j.dom.field.MailboxField;
038import org.apache.james.mime4j.dom.field.MailboxListField;
039import org.apache.james.mime4j.dom.field.MimeVersionField;
040import org.apache.james.mime4j.dom.field.ParsedField;
041import org.apache.james.mime4j.dom.field.UnstructuredField;
042import org.apache.james.mime4j.stream.Field;
043import org.apache.james.mime4j.stream.RawField;
044import org.apache.james.mime4j.stream.RawFieldParser;
045import org.apache.james.mime4j.util.ByteSequence;
046import org.apache.james.mime4j.util.ContentUtil;
047
048/**
049 * Lenient implementation of the {@link FieldParser} interface with a high degree of tolerance
050 * to non-severe MIME field format violations. 
051 */
052public class LenientFieldParser extends DelegatingFieldParser {
053
054    private static final FieldParser<ParsedField> PARSER = new LenientFieldParser();
055
056    /**
057     * Gets the default instance of this class.
058     *
059     * @return the default instance
060     */
061    public static FieldParser<ParsedField> getParser() {
062        return PARSER;
063    }
064
065    /**
066     * Parses the given byte sequence and returns an instance of the {@link ParsedField} class. 
067     * The type of the class returned depends on the field name; see {@link #parse(String)} for 
068     * a table of field names and their corresponding classes.
069     *
070     * @param raw the bytes to parse.
071     * @param monitor decoding monitor used while parsing/decoding.
072     * @return a parsed field.
073     * @throws MimeException if the raw string cannot be split into field name and body.
074     */
075    public static ParsedField parse(
076            final ByteSequence raw,
077            final DecodeMonitor monitor) throws MimeException {
078        Field rawField = RawFieldParser.DEFAULT.parseField(raw);
079        return PARSER.parse(rawField, monitor);
080    }
081
082    /**
083     * Parses the given string and returns an instance of the <code>Field</code> class.
084     * The type of the class returned depends on the field name.
085     *
086     * @param rawStr the string to parse.
087     * @param monitor a DecodeMonitor object used while parsing/decoding.
088     * @return a <code>ParsedField</code> instance.
089     * @throws MimeException if the raw string cannot be split into field name and body.
090     */
091    public static ParsedField parse(
092            final String rawStr,
093            final DecodeMonitor monitor) throws MimeException {
094        ByteSequence raw = ContentUtil.encode(rawStr);
095        RawField rawField = RawFieldParser.DEFAULT.parseField(raw);
096        // Do not retain the original raw representation as the field
097        // may require folding
098        return PARSER.parse(rawField, monitor);
099    }
100
101    /**
102     * Parses the given string and returns an instance of the {@link ParsedField} class. 
103     * The type of the class returned depends on the field name:
104     * <p>
105     * <table>
106     *   <tr><th>Class returned</th><th>Field names</th></tr>
107     *   <tr><td>{@link ContentTypeField}</td><td>Content-Type</td></tr>
108     *   <tr><td>{@link ContentLengthField}</td><td>Content-Length</td></tr>
109     *   <tr><td>{@link ContentTransferEncodingField}</td><td>Content-Transfer-Encoding</td></tr>
110     *   <tr><td>{@link ContentDispositionField}</td><td>Content-Disposition</td></tr>
111     *   <tr><td>{@link ContentDescriptionField}</td><td>Content-Description</td></tr>
112     *   <tr><td>{@link ContentIdField}</td><td>Content-ID</td></tr>
113     *   <tr><td>{@link ContentMD5Field}</td><td>Content-MD5</td></tr>
114     *   <tr><td>{@link ContentLanguageField}</td><td>Content-Language</td></tr>
115     *   <tr><td>{@link ContentLocationField}</td><td>Content-Location</td></tr>
116     *   <tr><td>{@link MimeVersionField}</td><td>MIME-Version</td></tr>
117     *   <tr><td>{@link DateTimeField}</td><td>Date, Resent-Date</td></tr>
118     *   <tr><td>{@link MailboxField}</td><td>Sender, Resent-Sender</td></tr>
119     *   <tr><td>{@link MailboxListField}</td><td>From, Resent-From</td></tr>
120     *   <tr><td>{@link AddressListField}</td><td>To, Cc, Bcc, Reply-To, Resent-To, Resent-Cc, Resent-Bcc</td></tr>
121     *   <tr><td>{@link UnstructuredField}</td><td>Subject and others</td></tr>
122     * </table>
123     *
124     * @param rawStr the string to parse.
125     * @return a parsed field.
126     * @throws MimeException if the raw string cannot be split into field name and body.
127     */
128    public static ParsedField parse(final String rawStr) throws MimeException {
129        return parse(rawStr, DecodeMonitor.SILENT);
130    }
131
132    public LenientFieldParser() {
133        super(UnstructuredFieldImpl.PARSER);
134        setFieldParser(FieldName.CONTENT_TYPE,
135                ContentTypeFieldLenientImpl.PARSER);        // lenient
136        setFieldParser(FieldName.CONTENT_LENGTH,
137                ContentLengthFieldImpl.PARSER);             // default
138        setFieldParser(FieldName.CONTENT_TRANSFER_ENCODING,
139                ContentTransferEncodingFieldImpl.PARSER);   // default
140        setFieldParser(FieldName.CONTENT_DISPOSITION,
141                ContentDispositionFieldLenientImpl.PARSER); // lenient
142        setFieldParser(FieldName.CONTENT_ID,
143                ContentIdFieldImpl.PARSER);                 // default
144        setFieldParser(FieldName.CONTENT_MD5,
145                ContentMD5FieldImpl.PARSER);                // default
146        setFieldParser(FieldName.CONTENT_DESCRIPTION,
147                ContentDescriptionFieldImpl.PARSER);        // default
148        setFieldParser(FieldName.CONTENT_LANGUAGE,
149                ContentLanguageFieldLenientImpl.PARSER);    // lenient
150        setFieldParser(FieldName.CONTENT_LOCATION,
151                ContentLocationFieldLenientImpl.PARSER);    // lenient
152        setFieldParser(FieldName.MIME_VERSION,
153                MimeVersionFieldImpl.PARSER);               // lenient
154
155        FieldParser<DateTimeField> dateTimeParser = DateTimeFieldLenientImpl.PARSER;
156        setFieldParser(FieldName.DATE, dateTimeParser);
157        setFieldParser(FieldName.RESENT_DATE, dateTimeParser);
158
159        FieldParser<MailboxListField> mailboxListParser = MailboxListFieldLenientImpl.PARSER;
160        setFieldParser(FieldName.FROM, mailboxListParser);
161        setFieldParser(FieldName.RESENT_FROM, mailboxListParser);
162
163        FieldParser<MailboxField> mailboxParser = MailboxFieldLenientImpl.PARSER;
164        setFieldParser(FieldName.SENDER, mailboxParser);
165        setFieldParser(FieldName.RESENT_SENDER, mailboxParser);
166
167        FieldParser<AddressListField> addressListParser = AddressListFieldLenientImpl.PARSER;
168        setFieldParser(FieldName.TO, addressListParser);
169        setFieldParser(FieldName.RESENT_TO, addressListParser);
170        setFieldParser(FieldName.CC, addressListParser);
171        setFieldParser(FieldName.RESENT_CC, addressListParser);
172        setFieldParser(FieldName.BCC, addressListParser);
173        setFieldParser(FieldName.RESENT_BCC, addressListParser);
174        setFieldParser(FieldName.REPLY_TO, addressListParser);
175    }
176
177}