DataOutputStream使用writeBytes写入中文数据时服务端接收到的是乱码

在开发过程中发现使用HttpURLConnection发送POST请求，需要同时上传文件和其他参数，参数中如果有中文字符，DataOutputStream使用writeBytes，将中文字符传入的话，服务端接收到的字符串会乱码，但是使用write就不会，故截取了一部分代码进行测试，看看到底是什么原因，方便日后遇到同样问题能够作为参考：

/*
 *
 * 模拟服务端
 *
 */
package cn.dk.test.web.controller;

import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RestController;

@RestController
public class ImitationHttpController {
	
	@GetMapping(value = "demo")
	public String demo(){
		return "this's just a demo";		
	}
	
	@PostMapping(value = "zh")
	public String ChineseTransmission(String name){
		System.out.println(name);
		return "OK";		
	}
}

其中DataOutputStream这个类为了方便找出原因所在，我把源码拷了一份出来，请求类中用的拷贝的那个类，并将writeBytes那个方法稍微改得容易看点,，可以看到源码中将字符串参数s的每个字符进行的强制为低精度的byte类型（占1个字节），而char类型占2字节，char存入中文字符，这个过程必定会造成丢失精度。

/**
     * Writes out the string to the underlying output stream as a
     * sequence of bytes. Each character in the string is written out, in
     * sequence, by discarding its high eight bits. If no exception is
     * thrown, the counter <code>written</code> is incremented by the
     * length of <code>s</code>.
     *
     * @param      s   a string of bytes to be written.
     * @exception  IOException  if an I/O error occurs.
     * @see        java.io.FilterOutputStream#out
     */
    public final void writeBytes(String s) throws IOException {
        int len = s.length();
        System.out.println(s);
        System.out.println("======开始char转换====");
        for (int i = 0 ; i < len ; i++) {
            	//out.write((byte)s.charAt(i));
        	char c = s.charAt(i);
        	byte b = (byte) c;
        	System.out.println(c);
        	System.out.println(Integer.toBinaryString(c));
    		System.out.println(Integer.toBinaryString(b));
        	out.write(b);
        }
        incCount(len);
    }

/*
 *
 * 模拟请求
 *
 */
package cn.dk.test.web.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.Map.Entry;

import cn.dk.test.web.origin.DataOutputStream;

public class Test {
	public static void main(String[] args) throws Exception {
		String url = "http://127.0.0.1:8080/zh";
		Map<String, String> requestText = new HashMap<String, String>();
		requestText.put("name", "中文");
		HttpRequest(url, requestText);
	}

	private final static String BOUNDARY = UUID.randomUUID().toString()
			.toLowerCase().replaceAll("-", "");// 边界标识
	private final static String PREFIX = "--";// 必须存在
	private final static String LINE_END = "\r\n";

	public static String HttpRequest(String requestUrl,
			Map<String, String> requestText) {

		HttpURLConnection conn = null;
		InputStream input = null;
		DataOutputStream dos = null;
		BufferedReader br = null;
		StringBuffer buffer = null;
		try {
			URL url = new URL(requestUrl);
			// 此处添加代理
			// Proxy proxy = new Proxy(Proxy.Type.HTTP, new
			// InetSocketAddress("127.0.0.1",10808));
			// conn = (HttpURLConnection) url.openConnection(proxy);
			conn = (HttpURLConnection) url.openConnection();
			conn.setDoOutput(true);
			conn.setDoInput(true);
			conn.setUseCaches(false);
			conn.setConnectTimeout(1000 * 10);
			conn.setReadTimeout(1000 * 10);
			conn.setRequestMethod("POST");
			conn.setRequestProperty("Accept", "*/*");
			conn.setRequestProperty("Connection", "keep-alive");
			conn.setRequestProperty("User-Agent",
					"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
			conn.setRequestProperty("Charset", "UTF-8");
			conn.setRequestProperty("Content-Type",
					"multipart/form-data; boundary=" + BOUNDARY);
			conn.connect();
			// 往服务器端写内容 也就是发起http请求需要带的参数
			dos = new DataOutputStream(conn.getOutputStream());
			// 请求参数部分
			StringBuilder requestParams = new StringBuilder();
			Set<Map.Entry<String, String>> set = requestText.entrySet();
			Iterator<Entry<String, String>> it = set.iterator();
			while (it.hasNext()) {
				Entry<String, String> entry = it.next();
				requestParams.append(PREFIX).append(BOUNDARY).append(LINE_END);
				requestParams.append("Content-Disposition: form-data; name=\"")
						.append(entry.getKey()).append("\"").append(LINE_END);
				requestParams.append("Content-Type: text/plain; charset=utf-8")
						.append(LINE_END);
				requestParams.append("Content-Transfer-Encoding: 8bit").append(
						LINE_END);
				requestParams.append(LINE_END);// 参数头设置完以后需要两个换行，然后才是参数内容
				requestParams.append(entry.getValue());
				requestParams.append(LINE_END);
			}
			dos.writeBytes(requestParams.toString());
			dos.flush();
			// 请求结束标志
			String endTarget = PREFIX + BOUNDARY + PREFIX + LINE_END;
			dos.write(endTarget.getBytes());
			dos.flush();

			// 读取服务器端返回的内容
			System.out
					.println("======================响应体=========================");
			System.out.println("ResponseCode:" + conn.getResponseCode()
					+ ",ResponseMessage:" + conn.getResponseMessage());
			if (conn.getResponseCode() == 200) {
				input = conn.getInputStream();
			} else {
				input = conn.getErrorStream();
			}

			br = new BufferedReader(new InputStreamReader(input, "UTF-8"));
			buffer = new StringBuffer();
			String line = null;
			while ((line = br.readLine()) != null) {
				buffer.append(line);
			}
			// ......
			System.out.println("返回报文:" + buffer.toString());

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (conn != null) {
					conn.disconnect();
					conn = null;
				}

				if (dos != null) {
					dos.close();
					dos = null;
				}

				if (br != null) {
					br.close();
					br = null;
				}
			} catch (IOException ex) {
				ex.printStackTrace();
			}
		}
		return buffer.toString();
	}
}

果不其然服务端显示的是的乱码

果不其然

然后再看看之前修改的源码执行代码打印的结果

--bc81cb81f647476b927f414ce0e0116d
Content-Disposition: form-data; name="name"
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit

中文

======开始char转换====
-
101101
101101
-
101101
101101
b
1100010
1100010
c
1100011
1100011
8
111000
111000
1
110001
110001
c
1100011
1100011
b
1100010
1100010
8
111000
111000
1
110001
110001
f
1100110
1100110
6
110110
110110
4
110100
110100
7
110111
110111
4
110100
110100
7
110111
110111
6
110110
110110
b
1100010
1100010
9
111001
111001
2
110010
110010
7
110111
110111
f
1100110
1100110
4
110100
110100
1
110001
110001
4
110100
110100
c
1100011
1100011
e
1100101
1100101
0
110000
110000
e
1100101
1100101
0
110000
110000
1
110001
110001
1
110001
110001
6
110110
110110
d
1100100
1100100


1101
1101


1010
1010
C
1000011
1000011
o
1101111
1101111
n
1101110
1101110
t
1110100
1110100
e
1100101
1100101
n
1101110
1101110
t
1110100
1110100
-
101101
101101
D
1000100
1000100
i
1101001
1101001
s
1110011
1110011
p
1110000
1110000
o
1101111
1101111
s
1110011
1110011
i
1101001
1101001
t
1110100
1110100
i
1101001
1101001
o
1101111
1101111
n
1101110
1101110
:
111010
111010
 
100000
100000
f
1100110
1100110
o
1101111
1101111
r
1110010
1110010
m
1101101
1101101
-
101101
101101
d
1100100
1100100
a
1100001
1100001
t
1110100
1110100
a
1100001
1100001
;
111011
111011
 
100000
100000
n
1101110
1101110
a
1100001
1100001
m
1101101
1101101
e
1100101
1100101
=
111101
111101
"
100010
100010
n
1101110
1101110
a
1100001
1100001
m
1101101
1101101
e
1100101
1100101
"
100010
100010


1101
1101


1010
1010
C
1000011
1000011
o
1101111
1101111
n
1101110
1101110
t
1110100
1110100
e
1100101
1100101
n
1101110
1101110
t
1110100
1110100
-
101101
101101
T
1010100
1010100
y
1111001
1111001
p
1110000
1110000
e
1100101
1100101
:
111010
111010
 
100000
100000
t
1110100
1110100
e
1100101
1100101
x
1111000
1111000
t
1110100
1110100
/
101111
101111
p
1110000
1110000
l
1101100
1101100
a
1100001
1100001
i
1101001
1101001
n
1101110
1101110
;
111011
111011
 
100000
100000
c
1100011
1100011
h
1101000
1101000
a
1100001
1100001
r
1110010
1110010
s
1110011
1110011
e
1100101
1100101
t
1110100
1110100
=
111101
111101
u
1110101
1110101
t
1110100
1110100
f
1100110
1100110
-
101101
101101
8
111000
111000


1101
1101


1010
1010
C
1000011
1000011
o
1101111
1101111
n
1101110
1101110
t
1110100
1110100
e
1100101
1100101
n
1101110
1101110
t
1110100
1110100
-
101101
101101
T
1010100
1010100
r
1110010
1110010
a
1100001
1100001
n
1101110
1101110
s
1110011
1110011
f
1100110
1100110
e
1100101
1100101
r
1110010
1110010
-
101101
101101
E
1000101
1000101
n
1101110
1101110
c
1100011
1100011
o
1101111
1101111
d
1100100
1100100
i
1101001
1101001
n
1101110
1101110
g
1100111
1100111
:
111010
111010
 
100000
100000
8
111000
111000
b
1100010
1100010
i
1101001
1101001
t
1110100
1110100


1101
1101


1010
1010


1101
1101


1010
1010
中
100111000101101
101101
文
110010110000111
11111111111111111111111110000111


1101
1101


1010
1010
======================响应体=========================
ResponseCode:200,ResponseMessage:null
返回报文:OK

重点看下中文这两个字符的精度丢失情况，高8位被丢失，后面那个之所以反而比原先的还要长是因为精度丢失转换成整数时负数补码运算造成的

最后可以使用write(byte[] byte)来达到正确传输中文的目的,这就需要将字符串传入之前就转换成byte[]

DataOutputStream使用writeBytes写入中文数据时服务端接收到的是乱码

猜你喜欢