001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019/**
020 * Operations on {@link CharSequence} that are
021 * {@code null} safe.
022 *
023 * @see CharSequence
024 * @since 3.0
025 */
026public class CharSequenceUtils {
027
028    private static final int NOT_FOUND = -1;
029
030    static final int TO_STRING_LIMIT = 16;
031
032    private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
033        for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
034            if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
035                return false;
036            }
037        }
038        return true;
039    }
040
041    /**
042     * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
043     *
044     * @param cs         the {@link CharSequence} to be processed
045     * @param searchChar the {@link CharSequence} to be searched for
046     * @param start      the start index
047     * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence.
048     */
049    static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
050        if (cs == null || searchChar == null) {
051            return StringUtils.INDEX_NOT_FOUND;
052        }
053        if (cs instanceof String) {
054            return ((String) cs).indexOf(searchChar.toString(), start);
055        }
056        if (cs instanceof StringBuilder) {
057            return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
058        }
059        if (cs instanceof StringBuffer) {
060            return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
061        }
062        return cs.toString().indexOf(searchChar.toString(), start);
063//        if (cs instanceof String && searchChar instanceof String) {
064//            // TODO: Do we assume searchChar is usually relatively small;
065//            //       If so then calling toString() on it is better than reverting to
066//            //       the green implementation in the else block
067//            return ((String) cs).indexOf((String) searchChar, start);
068//        } else {
069//            // TODO: Implement rather than convert to String
070//            return cs.toString().indexOf(searchChar.toString(), start);
071//        }
072    }
073
074    /**
075     * Returns the index within {@code cs} of the first occurrence of the
076     * specified character, starting the search at the specified index.
077     * <p>
078     * If a character with value {@code searchChar} occurs in the
079     * character sequence represented by the {@code cs}
080     * object at an index no smaller than {@code start}, then
081     * the index of the first such occurrence is returned. For values
082     * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
083     * this is the smallest value <em>k</em> such that:
084     * </p>
085     * <blockquote><pre>
086     * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
087     * </pre></blockquote>
088     * is true. For other values of {@code searchChar}, it is the
089     * smallest value <em>k</em> such that:
090     * <blockquote><pre>
091     * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &gt;= start)
092     * </pre></blockquote>
093     * <p>
094     * is true. In either case, if no such character occurs inm {@code cs}
095     * at or after position {@code start}, then
096     * {@code -1} is returned.
097     * </p>
098     * <p>
099     * There is no restriction on the value of {@code start}. If it
100     * is negative, it has the same effect as if it were zero: the entire
101     * {@link CharSequence} may be searched. If it is greater than
102     * the length of {@code cs}, it has the same effect as if it were
103     * equal to the length of {@code cs}: {@code -1} is returned.
104     * </p>
105     * <p>All indices are specified in {@code char} values
106     * (Unicode code units).
107     * </p>
108     *
109     * @param cs  the {@link CharSequence} to be processed, not null
110     * @param searchChar  the char to be searched for
111     * @param start  the start index, negative starts at the string start
112     * @return the index where the search char was found, -1 if not found
113     * @since 3.6 updated to behave more like {@link String}
114     */
115    static int indexOf(final CharSequence cs, final int searchChar, int start) {
116        if (cs instanceof String) {
117            return ((String) cs).indexOf(searchChar, start);
118        }
119        final int sz = cs.length();
120        if (start < 0) {
121            start = 0;
122        }
123        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
124            for (int i = start; i < sz; i++) {
125                if (cs.charAt(i) == searchChar) {
126                    return i;
127                }
128            }
129            return NOT_FOUND;
130        }
131        //supplementary characters (LANG1300)
132        if (searchChar <= Character.MAX_CODE_POINT) {
133            final char[] chars = Character.toChars(searchChar);
134            for (int i = start; i < sz - 1; i++) {
135                final char high = cs.charAt(i);
136                final char low = cs.charAt(i + 1);
137                if (high == chars[0] && low == chars[1]) {
138                    return i;
139                }
140            }
141        }
142        return NOT_FOUND;
143    }
144
145    /**
146     * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
147     *
148     * @param cs the {@link CharSequence} to be processed
149     * @param searchChar the {@link CharSequence} to find
150     * @param start the start index
151     * @return the index where the search sequence was found
152     */
153    static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
154        if (searchChar == null || cs == null) {
155            return NOT_FOUND;
156        }
157        if (searchChar instanceof String) {
158            if (cs instanceof String) {
159                return ((String) cs).lastIndexOf((String) searchChar, start);
160            }
161            if (cs instanceof StringBuilder) {
162                return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
163            }
164            if (cs instanceof StringBuffer) {
165                return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
166            }
167        }
168
169        final int len1 = cs.length();
170        final int len2 = searchChar.length();
171
172        if (start > len1) {
173            start = len1;
174        }
175
176        if (start < 0 || len2 > len1) {
177            return NOT_FOUND;
178        }
179
180        if (len2 == 0) {
181            return start;
182        }
183
184        if (len2 <= TO_STRING_LIMIT) {
185            if (cs instanceof String) {
186                return ((String) cs).lastIndexOf(searchChar.toString(), start);
187            }
188            if (cs instanceof StringBuilder) {
189                return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
190            }
191            if (cs instanceof StringBuffer) {
192                return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
193            }
194        }
195
196        if (start + len2 > len1) {
197            start = len1 - len2;
198        }
199
200        final char char0 = searchChar.charAt(0);
201
202        int i = start;
203        while (true) {
204            while (cs.charAt(i) != char0) {
205                i--;
206                if (i < 0) {
207                    return NOT_FOUND;
208                }
209            }
210            if (checkLaterThan1(cs, searchChar, len2, i)) {
211                return i;
212            }
213            i--;
214            if (i < 0) {
215                return NOT_FOUND;
216            }
217        }
218    }
219
220    /**
221     * Returns the index within {@code cs} of the last occurrence of
222     * the specified character, searching backward starting at the
223     * specified index. For values of {@code searchChar} in the range
224     * from 0 to 0xFFFF (inclusive), the index returned is the largest
225     * value <em>k</em> such that:
226     * <blockquote><pre>
227     * (this.charAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
228     * </pre></blockquote>
229     * is true. For other values of {@code searchChar}, it is the
230     * largest value <em>k</em> such that:
231     * <blockquote><pre>
232     * (this.codePointAt(<em>k</em>) == searchChar) &amp;&amp; (<em>k</em> &lt;= start)
233     * </pre></blockquote>
234     * is true. In either case, if no such character occurs in {@code cs}
235     * at or before position {@code start}, then {@code -1} is returned.
236     *
237     * <p>
238     * All indices are specified in {@code char} values
239     * (Unicode code units).
240     * </p>
241     *
242     * @param cs  the {@link CharSequence} to be processed
243     * @param searchChar  the char to be searched for
244     * @param start  the start index, negative returns -1, beyond length starts at end
245     * @return the index where the search char was found, -1 if not found
246     * @since 3.6 updated to behave more like {@link String}
247     */
248    static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
249        if (cs instanceof String) {
250            return ((String) cs).lastIndexOf(searchChar, start);
251        }
252        final int sz = cs.length();
253        if (start < 0) {
254            return NOT_FOUND;
255        }
256        if (start >= sz) {
257            start = sz - 1;
258        }
259        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
260            for (int i = start; i >= 0; --i) {
261                if (cs.charAt(i) == searchChar) {
262                    return i;
263                }
264            }
265            return NOT_FOUND;
266        }
267        //supplementary characters (LANG1300)
268        //NOTE - we must do a forward traversal for this to avoid duplicating code points
269        if (searchChar <= Character.MAX_CODE_POINT) {
270            final char[] chars = Character.toChars(searchChar);
271            //make sure it's not the last index
272            if (start == sz - 1) {
273                return NOT_FOUND;
274            }
275            for (int i = start; i >= 0; i--) {
276                final char high = cs.charAt(i);
277                final char low = cs.charAt(i + 1);
278                if (chars[0] == high && chars[1] == low) {
279                    return i;
280                }
281            }
282        }
283        return NOT_FOUND;
284    }
285
286    /**
287     * Green implementation of regionMatches.
288     *
289     * @param cs the {@link CharSequence} to be processed
290     * @param ignoreCase whether or not to be case-insensitive
291     * @param thisStart the index to start on the {@code cs} CharSequence
292     * @param substring the {@link CharSequence} to be looked for
293     * @param start the index to start on the {@code substring} CharSequence
294     * @param length character length of the region
295     * @return whether the region matched
296     * @see String#regionMatches(boolean, int, String, int, int)
297     */
298    static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
299            final CharSequence substring, final int start, final int length)    {
300        if (cs instanceof String && substring instanceof String) {
301            return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
302        }
303        int index1 = thisStart;
304        int index2 = start;
305        int tmpLen = length;
306
307        // Extract these first so we detect NPEs the same as the java.lang.String version
308        final int srcLen = cs.length() - thisStart;
309        final int otherLen = substring.length() - start;
310
311        // Check for invalid parameters
312        if (thisStart < 0 || start < 0 || length < 0) {
313            return false;
314        }
315
316        // Check that the regions are long enough
317        if (srcLen < length || otherLen < length) {
318            return false;
319        }
320
321        while (tmpLen-- > 0) {
322            final char c1 = cs.charAt(index1++);
323            final char c2 = substring.charAt(index2++);
324
325            if (c1 == c2) {
326                continue;
327            }
328
329            if (!ignoreCase) {
330                return false;
331            }
332
333            // The real same check as in String#regionMatches(boolean, int, String, int, int):
334            final char u1 = Character.toUpperCase(c1);
335            final char u2 = Character.toUpperCase(c2);
336            if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
337                return false;
338            }
339        }
340
341        return true;
342    }
343
344    /**
345     * Returns a new {@link CharSequence} that is a subsequence of this
346     * sequence starting with the {@code char} value at the specified index.
347     *
348     * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
349     * The length (in {@code char}) of the returned sequence is {@code length() - start},
350     * so if {@code start == end} then an empty sequence is returned.</p>
351     *
352     * @param cs  the specified subsequence, null returns null
353     * @param start  the start index, inclusive, valid
354     * @return a new subsequence, may be null
355     * @throws IndexOutOfBoundsException if {@code start} is negative or if
356     *  {@code start} is greater than {@code length()}
357     */
358    public static CharSequence subSequence(final CharSequence cs, final int start) {
359        return cs == null ? null : cs.subSequence(start, cs.length());
360    }
361
362    /**
363     * Converts the given CharSequence to a char[].
364     *
365     * @param source the {@link CharSequence} to be processed.
366     * @return the resulting char array, never null.
367     * @since 3.11
368     */
369    public static char[] toCharArray(final CharSequence source) {
370        final int len = StringUtils.length(source);
371        if (len == 0) {
372            return ArrayUtils.EMPTY_CHAR_ARRAY;
373        }
374        if (source instanceof String) {
375            return ((String) source).toCharArray();
376        }
377        final char[] array = new char[len];
378        for (int i = 0; i < len; i++) {
379            array[i] = source.charAt(i);
380        }
381        return array;
382    }
383
384    /**
385     * {@link CharSequenceUtils} instances should NOT be constructed in
386     * standard programming.
387     *
388     * <p>This constructor is public to permit tools that require a JavaBean
389     * instance to operate.</p>
390     *
391     * @deprecated TODO Make private in 4.0.
392     */
393    @Deprecated
394    public CharSequenceUtils() {
395        // empty
396    }
397}