简易代理服务器-学习日志_网络编程|并发编程实验

吐槽：
昨天去了一家贵阳的公司笔试面试C语言软件开发偏嵌入式的，本着离家近，好久没笔试面试想去练练手就去了，笔试基本上没问题自我感觉答得挺好的，除了最后一个题要求写两个效率高的排序，在写堆排序的时候一些小细节写错了（回去跑自己写的代码调试才发现出错改正过来），然后面试我的人好像是一个小组长，按着简历问了一些问题，然后就一直说我这个专业不太好找工作以及我专业其他同学的就业去向（一直说着我也很无奈），问我的技术方面的问题没几个或者说没有，最后问了一个你怎么看堆栈，我不知道他说的是从哪方面看，就问了一下指的是哪方面，他以为我不知道他说的是哪个堆栈然后就说了不是数据结构的那个然后让我回答，我就简单了说了一些new/delete和malloc/free以及函数的调用，然后就结束了..
第二天，收到的消息是被拒了，也没有给出被拒的原因，本来是打算9月份去找校招的，没想到第一次出师就失败了，感觉是学的专业被这群人抛弃了还有嵌入式方面我的确不太了解而面试官也没问这方面的问题，有点小难过，也深深的后悔没在大学的时候去好好的找工作，现在毕业了（18届），校招似乎都只招19届的学生，如果9~10月份找不到就只能去考研了~_~….

正文：
这篇相当于是学习日志_网络编程|并发编程的的实验，距离上篇日志拖了很长时间，因为这个实验的过程还是挺简单的，所以就一直没有去动手做。
而这篇张也就是书的结尾了，原先是因为不太明白《unix环境高级编程》，所以就来看《深入理解计算机系统》，这本书介绍了很多基础知识以及有趣的实验（至少能让迷茫的我找点事做），也学习了很多C语言的知识、汇编知识、一些操作系统知识与硬件知识，至少比学习之前的我更加懂得了为什么去使用一些函数，以及其实现的原理，这些都还只是基础，还有很长的路要走，接下来看情况去看《unix环境高级编程》这本书吧。
说起代理服务器，以前不懂电脑的时候，一直不知道这个名词是什么，其实也就相当于一个中转站，客户端发送请求到代理服务器，然后代理服务器把请求转交给真正的服务器端，似乎一些网游加速器，或者是翻墙软件，fildder都是这样实现的，当然本次只是写一个简易的代理服务器，实现很简单，而比如网友加速器，如果使用代理服务器，那么就要编写一套LSP（分层服务提供商）以及控制路由表什么的，这些我自己也不太清楚，只是大概了解过程。
本次编写的是一个多线程http代理服务器，处理GET请求并转发。
因为要方便实验测试，所以使用的是纯C的代码去编写，和上篇日志中所使用的C++写法与使用的线程库不同。

调试工具
Telnet: 不安全的 ssh，需要手动构造 HTTP 请求，如果想要测试非法的 header，这个功能就很有用
man telnet
telnet www.wdxtub.com
GET http://www.wdxtub.com HTTP/1.0
cURL: 会自动构建 HTTP 请求
curl http://www.wdxtub.com
代理模式 curl –proxy lemonshark.ics.cs.cmu.edu:3092 http://www.wdxtub.com
netcat: 多用途网络工具，用法与 telnet 类似
nc catshark.ics.cs.cmu.edu 12345
GET http://www.cmu.edu/hub/index.html HTTP/1.0

我自己是使用netcat/nc占用一个端口当作侦测，然后再使用curl去传输两个地址，然后就可以看到shell上输出相应信息，如下图。
这里写图片描述

代码本身很简单，难点的话，在于如何对字符串进行操作，以及实验要求填写http请求头，如客户端发送的请求头可能包含Host、User-Agent、Connection、Proxy-Connection等的信息，如果客户端发送的请求头足够了，我们可以不用build_headr，但是如果客户端没有发送附带的这些，我们就添加，让转发到的目的服务器知道我们在做什么。
该代理服务器也可以再优化一下，增加一个缓存机制以及算法，存储部分客户端请求url所得到的内容，当其他的客户端访问的时候直接访问缓存即可，但是因为程序是多线程执行，所以需要考虑使用互斥量与同步量。
下代码是写者优先模式：
这种模式的缺点就是，要是大部分客户端请求的url在缓存中都存有的话，那么代理服务器会一直处于读者模式服务于从缓存中返还内容给客户端，而其他客户端请求的url内容在缓存中没有的话，需要等待读者模式结束，否者就永远被阻塞，同理写者模式与其相同。
如果以下内容想要mutex实现的话，mutex控制readcnt的修改，控制变量去控制读者数量，如果读者数量>0，就wait写者线程，否则就唤醒线程。

int readcnt;
int wirtecnt;
sem_t read; //初始化1
sem_t write; //初始化1
void dosomething(int cond){
    ...
    ...
    if(cacheFind(cache,url)){
        P(read);
        readcnt++;
        if(readcnt==1){
            P(write);
        }
        V(read);
        write(cond,buf,MAXLINE);
        P(read)
        readcnt--;
        //当没有读者的时候才可以写
        if(readcnt==0){
            v(write);
        }
        v(read)
        return;
    }
    ...
    ...
    P(write);
    writecnt++;
    if(writecnt==1){
        P(read);
    }
    V(write);
    saveCache(url,content);
    P(write);
    writecnt--;
    if(writecnt==0){
        V(read);
    }
    V(write);
}

HTTP/1.1 里默认将 connection 定义为 keep-alive，也就是一条 TCP
连接可以处理多个请求，不用每次都要重新建立 TCP 连接。我们的简易 proxy 还无法提供这样的功能，所以在读 client 发过来的
header 的时候，如果是 Connection: keep-alive 或者 Proxy-Connection:
keep-alive，我们都要把它们换成 Connection: close 或 Proxy-Connection: close。

PS：由于不知道实验做什么，看了实验文件的readme也不太清楚，所以就去看了别人的实验报告，所以代码大概差别不大，毕竟这个思路一样，代码不会差别得太多。
代理服务器代码：

#include <stdio.h>
#include"csapp.h"

/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *prox_hdr = "Proxy-Connection: close\r\n";
static const char *host_hdr_format = "Host: %s\r\n";
static const char *requestlint_hdr_format = "GET %s HTTP/1.0\r\n";
static const char *endof_hdr = "\r\n";

static const char *connection_key = "Connection";
static const char *user_agent_key= "User-Agent";
static const char *proxy_connection_key = "Proxy-Connection";
static const char *host_key = "Host";

typedef struct sockaddr SA;

void doit(void* fd);
void dosomething(int fd);
void parseUrl(char *url,char *host,char *path,int *port);
void build_headr(char *dst,char *hostname,char *path,rio_t *clientio);

int main(int argc,char **args)
{
    char hostname[MAXLINE],port[MAXLINE];
    int serd;
    int *cond;
    pthread_t tid;
    //防止进程因SIGPIPE而结束进程
    Signal(SIGPIPE,SIGIGN);
    struct sockaddr_storage clientaddr;
    socklen_t clientlen=sizeof(clientaddr);
    if(argc!=2){
        return 0;
    }
    serd=open_listenfd(args[1]);
    while(1){
        //防止主线程与对等线程竞争
        cond=(int*)Malloc(sizeof(int));
        *cond=accept(serd,(SA*)&clientaddr,&clientlen);
        Getnameinfo((SA*)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0);
        fprintf(stdout,"Accept connection from host: %s , port: %d \n",hostname,port);
        Pthread_create(&tid,NULL,doit,(void*)cond);
    }
    return 0;
}

void doit(void * fd){
    Pthread_detach(pthread_self());
    int cond=*(int*)fd;
    dosomething(cond);
    close(cond);
    free(fd);
    fd=NULL;
}

void dosomething(int fd){
    char host[MAXLINE],url[MAXLINE],path[MAXLINE],buf[MAXLINE],method[MAXLINE],version[MAXLINE];
    char sendheadr[MAXLINE],portstr[MAXLINE];
    int endserd;
    //http端口，一般默认为80
    int port;
    rio_t cio;
    rio_readinitb(&cio,fd);
    rio_readlineb(&cio,buf,MAXLINE);
    if(strcasecmp(method,"GET")){
        printf("Proxy does not implement the method");
        return;
    }
    sscanf(buf,"%s %s %s",method,url,version);
    parseUrl(url,host,path,&port);
    build_headr(sendheadr,host,path,&cio);

    sprintf(portstr,"%d",port);
    endserd=open_clientfd(host,portstr);
    if(endserd<0){
        fprintf(stdout,"connection falut");
        return;
    }

    rio_t conio;
    rio_readinitb(&conio,endserd);
    rio_writen(endserd,sendheadr,strlen(sendheadr)+1);
    size_t n;
    while((n=rio_readlineb(&conio,buf,MAXLINE))>0){
        rio_writen(fd,buf,n);
    }
    close(endserd);
}

void parseUrl(char *url,char *host,char *path,int *port){

    *port=80;
    char* pos = strstr(url,"//");
    pos = pos!=NULL? pos+2:url;
    char*pos2 = strstr(pos,":");
    //pos2！=NULL情况是对于如请求的url是ip地址+端口+path的情况
    if(pos2!=NULL)
    {
        *pos2 = '\0';
        sscanf(pos,"%s",host);
        sscanf(pos2+1,"%d%s",port,path);
    }
    else
    {
        pos2 = strstr(pos,"/");
        if(pos2!=NULL)
        {
            *pos2 = '\0';
            sscanf(pos,"%s",host);
            *pos2 = '/';
            sscanf(pos2,"%s",path);
        }
        else
        {
            sscanf(pos,"%s",host);
        }
    }
}

void build_headr(char *http_header,char *hostname,char *path,rio_t *client_rio)
{
    char buf[MAXLINE],request_hdr[MAXLINE],other_hdr[MAXLINE],host_hdr[MAXLINE];
    /*request line*/
    sprintf(request_hdr,requestlint_hdr_format,path);
    /*get other request header for client rio and change it */
    while(Rio_readlineb(client_rio,buf,MAXLINE)>0)
    {
        if(strcmp(buf,endof_hdr)==0) break;/*EOF*/

        if(!strncasecmp(buf,host_key,strlen(host_key)))/*Host:*/
        {
            strcpy(host_hdr,buf);
            continue;
        }

        if(!strncasecmp(buf,connection_key,strlen(connection_key))
                &&!strncasecmp(buf,proxy_connection_key,strlen(proxy_connection_key))
                &&!strncasecmp(buf,user_agent_key,strlen(user_agent_key)))
        {
            strcat(other_hdr,buf);
        }
    }
    if(strlen(host_hdr)==0)
    {
        sprintf(host_hdr,host_hdr_format,hostname);
    }
    //上面循环读取到othre_hdr中的一些请求头可能会与默认的重复
    sprintf(http_header,"%s%s%s%s%s%s%s",
            request_hdr,
            host_hdr,
            conn_hdr,
            prox_hdr,
            user_agent_hdr,
            other_hdr,
            endof_hdr);

}

简易代理服务器-学习日志_网络编程|并发编程实验

猜你喜欢