https://github.com/mozilla/gecko-dev
Raw File
Tip revision: 0b024105bfcb2304529100c159eda55374dceec3 authored by Nils Ohlmeier [:drno] on 04 January 2021, 19:40:18 UTC
Bug 1683964: Harden the handling of outgoing streams. r=ng, a=RyanVM
Tip revision: 0b02410
httpget.c
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */


/*
 * Author: Wan-Teh Chang
 *
 * Given an HTTP URL, httpget uses the GET method to fetch the file.
 * The fetched file is written to stdout by default, or can be
 * saved in an output file.
 *
 * This is a single-threaded program.
 */

#include "prio.h"
#include "prnetdb.h"
#include "prlog.h"
#include "prerror.h"
#include "prprf.h"
#include "prinit.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>  /* for atoi */

#define FCOPY_BUFFER_SIZE (16 * 1024)
#define INPUT_BUFFER_SIZE 1024
#define LINE_SIZE 512
#define HOST_SIZE 256
#define PORT_SIZE 32
#define PATH_SIZE 512

/*
 * A buffer for storing the excess input data for ReadLine.
 * The data in the buffer starts from (including) the element pointed to
 * by inputHead, and ends just before (not including) the element pointed
 * to by inputTail.  The buffer is empty if inputHead == inputTail.
 */

static char inputBuf[INPUT_BUFFER_SIZE];
/*
 * inputBufEnd points just past the end of inputBuf
 */
static char *inputBufEnd = inputBuf + sizeof(inputBuf);
static char *inputHead = inputBuf;
static char *inputTail = inputBuf;

static PRBool endOfStream = PR_FALSE;

/*
 * ReadLine --
 *
 * Read in a line of text, terminated by CRLF or LF, from fd into buf.
 * The terminating CRLF or LF is included (always as '\n').  The text
 * in buf is terminated by a null byte.  The excess bytes are stored in
 * inputBuf for use in the next ReadLine call or FetchFile call.
 * Returns the number of bytes in buf.  0 means end of stream.  Returns
 * -1 if read fails.
 */

PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
{
    char *dst = buf;
    char *bufEnd = buf + bufSize;  /* just past the end of buf */
    PRBool lineFound = PR_FALSE;
    char *crPtr = NULL;  /* points to the CR ('\r') character */
    PRInt32 nRead;

loop:
    PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
              && inputTail <= inputBufEnd);
    while (lineFound == PR_FALSE && inputHead != inputTail
           && dst < bufEnd - 1) {
        if (*inputHead == '\r') {
            crPtr = dst;
        } else if (*inputHead == '\n') {
            lineFound = PR_TRUE;
            if (crPtr == dst - 1) {
                dst--;
            }
        }
        *(dst++) = *(inputHead++);
    }
    if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
        *dst = '\0';
        return dst - buf;
    }

    /*
     * The input buffer should be empty now
     */
    PR_ASSERT(inputHead == inputTail);

    nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
    if (nRead == -1) {
        *dst = '\0';
        return -1;
    } else if (nRead == 0) {
        endOfStream = PR_TRUE;
        *dst = '\0';
        return dst - buf;
    }
    inputHead = inputBuf;
    inputTail = inputBuf + nRead;
    goto loop;
}

PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
{
    PRInt32 nBytes = inputTail - inputHead;

    if (nBytes == 0) {
        if (endOfStream) {
            return -1;
        } else {
            return 0;
        }
    }
    if ((PRInt32) bufSize < nBytes) {
        nBytes = bufSize;
    }
    memcpy(buf, inputHead, nBytes);
    inputHead += nBytes;
    return nBytes;
}

PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
{
    char buf[FCOPY_BUFFER_SIZE];
    PRInt32 nBytes;

    while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
        if (PR_Write(out, buf, nBytes) != nBytes) {
            fprintf(stderr, "httpget: cannot write to file\n");
            return PR_FAILURE;
        }
    }
    if (nBytes < 0) {
        /* Input buffer is empty and end of stream */
        return PR_SUCCESS;
    }
    while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
        if (PR_Write(out, buf, nBytes) != nBytes) {
            fprintf(stderr, "httpget: cannot write to file\n");
            return PR_FAILURE;
        }
    }
    if (nBytes < 0) {
        fprintf(stderr, "httpget: cannot read from socket\n");
        return PR_FAILURE;
    }
    return PR_SUCCESS;
}

PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
{
    PRInt32 nBytes;
    PRFileMap *outfMap;
    void *addr;
    char *start;
    PRUint32 rem;
    PRUint32 bytesToRead;
    PRStatus rv;
    PRInt64 sz64;

    LL_UI2L(sz64, size);
    outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
    PR_ASSERT(outfMap);
    addr = PR_MemMap(outfMap, LL_ZERO, size);
    if (addr == NULL) {
        fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
                PR_GetOSError());

        PR_CloseFileMap(outfMap);
        return PR_FAILURE;
    }
    start = (char *) addr;
    rem = size;
    while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
        start += nBytes;
        rem -= nBytes;
    }
    if (nBytes < 0) {
        /* Input buffer is empty and end of stream */
        return PR_SUCCESS;
    }
    bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
        start += nBytes;
        rem -= nBytes;
        bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
    }
    if (nBytes < 0) {
        fprintf(stderr, "httpget: cannot read from socket\n");
        return PR_FAILURE;
    }
    rv = PR_MemUnmap(addr, size);
    PR_ASSERT(rv == PR_SUCCESS);
    rv = PR_CloseFileMap(outfMap);
    PR_ASSERT(rv == PR_SUCCESS);
    return PR_SUCCESS;
}

PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
                  char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
{
    char *start, *end;
    char *dst;
    char *hostEnd;
    char *portEnd;
    char *pathEnd;

    if (strncmp(url, "http", 4)) {
        fprintf(stderr, "httpget: the protocol must be http\n");
        return PR_FAILURE;
    }
    if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
        fprintf(stderr, "httpget: malformed URL: %s\n", url);
        return PR_FAILURE;
    }

    start = end = url + 7;
    dst = host;
    hostEnd = host + hostSize;
    while (*end && *end != ':' && *end != '/') {
        if (dst == hostEnd - 1) {
            fprintf(stderr, "httpget: host name too long\n");
            return PR_FAILURE;
        }
        *(dst++) = *(end++);
    }
    *dst = '\0';

    if (*end == '\0') {
        PR_snprintf(port, portSize, "%d", 80);
        PR_snprintf(path, pathSize, "%s", "/");
        return PR_SUCCESS;
    }

    if (*end == ':') {
        end++;
        dst = port;
        portEnd = port + portSize;
        while (*end && *end != '/') {
            if (dst == portEnd - 1) {
                fprintf(stderr, "httpget: port number too long\n");
                return PR_FAILURE;
            }
            *(dst++) = *(end++);
        }
        *dst = '\0';
        if (*end == '\0') {
            PR_snprintf(path, pathSize, "%s", "/");
            return PR_SUCCESS;
        }
    } else {
        PR_snprintf(port, portSize, "%d", 80);
    }

    dst = path;
    pathEnd = path + pathSize;
    while (*end) {
        if (dst == pathEnd - 1) {
            fprintf(stderr, "httpget: file pathname too long\n");
            return PR_FAILURE;
        }
        *(dst++) = *(end++);
    }
    *dst = '\0';
    return PR_SUCCESS;
}

void PrintUsage(void) {
    fprintf(stderr, "usage: httpget url\n"
            "       httpget -o outputfile url\n"
            "       httpget url -o outputfile\n");
}

int main(int argc, char **argv)
{
    PRHostEnt hostentry;
    char buf[PR_NETDB_BUF_SIZE];
    PRNetAddr addr;
    PRFileDesc *socket = NULL, *file = NULL;
    PRIntn cmdSize;
    char host[HOST_SIZE];
    char port[PORT_SIZE];
    char path[PATH_SIZE];
    char line[LINE_SIZE];
    int exitStatus = 0;
    PRBool endOfHeader = PR_FALSE;
    char *url;
    char *fileName = NULL;
    PRUint32 fileSize;

    if (argc != 2 && argc != 4) {
        PrintUsage();
        exit(1);
    }

    if (argc == 2) {
        /*
         * case 1: httpget url
         */
        url = argv[1];
    } else {
        if (strcmp(argv[1], "-o") == 0) {
            /*
             * case 2: httpget -o outputfile url
             */
            fileName = argv[2];
            url = argv[3];
        } else {
            /*
             * case 3: httpget url -o outputfile
             */
            url = argv[1];
            if (strcmp(argv[2], "-o") != 0) {
                PrintUsage();
                exit(1);
            }
            fileName = argv[3];
        }
    }

    if (ParseURL(url, host, sizeof(host), port, sizeof(port),
                 path, sizeof(path)) == PR_FAILURE) {
        exit(1);
    }

    if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
        == PR_FAILURE) {
        fprintf(stderr, "httpget: unknown host name: %s\n", host);
        exit(1);
    }

    addr.inet.family = PR_AF_INET;
    addr.inet.port = PR_htons((short) atoi(port));
    addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);

    socket = PR_NewTCPSocket();
    if (socket == NULL) {
        fprintf(stderr, "httpget: cannot create new tcp socket\n");
        exit(1);
    }

    if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
        fprintf(stderr, "httpget: cannot connect to http server\n");
        exitStatus = 1;
        goto done;
    }

    if (fileName == NULL) {
        file = PR_STDOUT;
    } else {
        file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
                       00777);
        if (file == NULL) {
            fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
                    fileName, PR_GetError(), PR_GetOSError());
            exitStatus = 1;
            goto done;
        }
    }

    cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
    PR_ASSERT(cmdSize == (PRIntn) strlen("GET  HTTP/1.0\r\n\r\n")
              + (PRIntn) strlen(path));
    if (PR_Write(socket, buf, cmdSize) != cmdSize) {
        fprintf(stderr, "httpget: cannot write to http server\n");
        exitStatus = 1;
        goto done;
    }

    if (ReadLine(socket, line, sizeof(line)) <= 0) {
        fprintf(stderr, "httpget: cannot read line from http server\n");
        exitStatus = 1;
        goto done;
    }

    /* HTTP response: 200 == OK */
    if (strstr(line, "200") == NULL) {
        fprintf(stderr, "httpget: %s\n", line);
        exitStatus = 1;
        goto done;
    }

    while (ReadLine(socket, line, sizeof(line)) > 0) {
        if (line[0] == '\n') {
            endOfHeader = PR_TRUE;
            break;
        }
        if (strncmp(line, "Content-Length", 14) == 0
            || strncmp(line, "Content-length", 14) == 0) {
            char *p = line + 14;

            while (*p == ' ' || *p == '\t') {
                p++;
            }
            if (*p != ':') {
                continue;
            }
            p++;
            while (*p == ' ' || *p == '\t') {
                p++;
            }
            fileSize = 0;
            while ('0' <= *p && *p <= '9') {
                fileSize = 10 * fileSize + (*p - '0');
                p++;
            }
        }
    }
    if (endOfHeader == PR_FALSE) {
        fprintf(stderr, "httpget: cannot read line from http server\n");
        exitStatus = 1;
        goto done;
    }

    if (fileName == NULL || fileSize == 0) {
        FetchFile(socket, file);
    } else {
        FastFetchFile(socket, file, fileSize);
    }

done:
    if (socket) {
        PR_Close(socket);
    }
    if (file) {
        PR_Close(file);
    }
    PR_Cleanup();
    return exitStatus;
}
back to top