1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache license, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the license for the specific language governing permissions and
15 * limitations under the license.
16 */
17 package org.apache.logging.log4j.core.util;
18
19 import java.io.UnsupportedEncodingException;
20 import java.nio.charset.Charset;
21 import java.nio.charset.StandardCharsets;
22
23 /**
24 * Encodes Strings to bytes.
25 *
26 * @since 2.5
27 */
28 public final class StringEncoder {
29
30 private StringEncoder() {
31 }
32
33 /**
34 * Converts a String to a byte[].
35 *
36 * @param str if null, return null.
37 * @param charset if null, use the default charset.
38 * @return a byte[]
39 */
40 public static byte[] toBytes(final String str, final Charset charset) {
41 if (str != null) {
42 if (StandardCharsets.ISO_8859_1.equals(charset)) {
43 return encodeSingleByteChars(str);
44 }
45 final Charset actual = charset != null ? charset : Charset.defaultCharset();
46 try { // LOG4J2-935: String.getBytes(String) gives better performance
47 return str.getBytes(actual.name());
48 } catch (final UnsupportedEncodingException e) {
49 return str.getBytes(actual);
50 }
51 }
52 return null;
53 }
54
55 /**
56 * Encodes the specified char sequence by casting each character to a byte.
57 *
58 * @param s the char sequence to encode
59 * @return the encoded String
60 * @see <a href="https://issues.apache.org/jira/browse/LOG4J2-1151">LOG4J2-1151</a>
61 */
62 public static byte[] encodeSingleByteChars(final CharSequence s) {
63 final int length = s.length();
64 final byte[] result = new byte[length];
65 encodeString(s, 0, length, result);
66 return result;
67 }
68
69 // LOG4J2-1151
70 /*
71 * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work
72 * will be done here.
73 */
74 public static int encodeIsoChars(final CharSequence charArray, int charIndex, final byte[] byteArray, int byteIndex, final int length) {
75 int i = 0;
76 for (; i < length; i++) {
77 final char c = charArray.charAt(charIndex++);
78 if (c > 255) {
79 break;
80 }
81 byteArray[(byteIndex++)] = ((byte) c);
82 }
83 return i;
84 }
85
86 // LOG4J2-1151
87 public static int encodeString(final CharSequence charArray, int charOffset, int charLength, final byte[] byteArray) {
88 int byteOffset = 0;
89 int length = Math.min(charLength, byteArray.length);
90 int charDoneIndex = charOffset + length;
91 while (charOffset < charDoneIndex) {
92 final int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length);
93 charOffset += done;
94 byteOffset += done;
95 if (done != length) {
96 final char c = charArray.charAt(charOffset++);
97 if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex)
98 && (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
99 if (charLength > byteArray.length) {
100 charDoneIndex++;
101 charLength--;
102 }
103 charOffset++;
104 }
105 byteArray[(byteOffset++)] = '?';
106 length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset);
107 }
108 }
109 return byteOffset;
110 }
111 }