// Class for managing the server's interaction with a client. #include #include #include #include #include #include #include #include #include #include #include #include "Client.h" #include "http_server.h" using namespace std; extern unsigned int chunkSize; extern bool verbose; // Sends part of the requested file to the client. // Assumes that the file handle is already open. // Sets "sending" to false when it's done sending. // // Returns: // 1 if the client should be removed from the list of clients // due to a socket error or non-persistent connection // 0 if client should stay in list int Client::sendFilePart() { assert(sending); assert(sendFile.is_open()); if (unsentBufferAmt == 0) { // Nothing buffered, gather some more. if (!fillSendBuffer()) { // No more data available. sending = false; sendFile.close(); resolvedPath.clear(); return ! persistent; } } // unsentBufferAmt gives us the amount of data remaining in the // buffer. From that, compute the starting point of what we // want to send. char *end_of_buffer = sendBuffer + validBufferAmt; char *data_to_send = end_of_buffer - unsentBufferAmt; ssize_t result = send(socket, data_to_send, unsentBufferAmt, 0); if (result <= 0) { if (errno == EWOULDBLOCK) { // Send could not complete because the kernel's // sender-side TCP buffer is full and there's // no room to copy in our bytes. if (verbose) cout << "send returned EWOULDBLOCK" << endl; return 0; } char *error_str = strerror(errno); cout << "ERROR -- Incommunicable Error: send() failed with " << error_str << endl; return 1; } // We successfully sent (well, gave to TCP to send) "result" bytes. unsentBufferAmt -= result; return 0; } // Takes data from the file we're reading and puts it in the sendBuffer, // adding the chunk size (if needed) and the CRLFs to the sendBuffer as well. // Updates chunkBytesRemaining, validBufferAmt, unsentBufferAmt // // Returns: // 1 if able to put data in the sendBuffer // 0 if there is nothing left to send int Client::fillSendBuffer() { assert(unsentBufferAmt == 0); assert(sendFile.is_open()); unsigned int remaining = fileSize - readSoFar; if (remaining == 0) { validBufferAmt = 0; unsentBufferAmt = validBufferAmt; // Do a read from the file to trigger EOF. sendFile.read(sendBuffer, 1); // Should have read 0 bytes, and the file should now // be completely read from. assert(sendFile.gcount() == 0); assert(sendFile.eof()); return 0; } if (chunkSize == 0) { // Don't have to worry about chunking. sendFile.read(sendBuffer, sendSize); unsigned int howmuchread = sendFile.gcount(); readSoFar += howmuchread; // Update counters. validBufferAmt = howmuchread; unsentBufferAmt = validBufferAmt; return 1; } if (chunkBytesRemaining > 0) { // Continue the current chunk. // Reserve 7 bytes for the final chunk terminator in case // we read the end of the file. unsigned int readamt = min(sendSize - 7, chunkBytesRemaining); sendFile.read(sendBuffer, readamt); unsigned int howmuchread = sendFile.gcount(); readSoFar += howmuchread; if (readSoFar == fileSize) { // End of the file. chunkBytesRemaining -= howmuchread; assert(chunkBytesRemaining == 0); // Append CRLF 0 CRLF CRLF (final chunk terminator). sendBuffer[howmuchread++] = '\r'; sendBuffer[howmuchread++] = '\n'; sendBuffer[howmuchread++] = '0'; sendBuffer[howmuchread++] = '\r'; sendBuffer[howmuchread++] = '\n'; sendBuffer[howmuchread++] = '\r'; sendBuffer[howmuchread++] = '\n'; if (verbose) cout << "filling sendBuffer with last chunk" << endl; } else if (howmuchread == chunkBytesRemaining) { // End of this chunk. chunkBytesRemaining -= howmuchread; // Append CRLF (terminator for one chunk). sendBuffer[howmuchread++] = '\r'; sendBuffer[howmuchread++] = '\n'; } else // Still working on the chunk. chunkBytesRemaining -= howmuchread; validBufferAmt = howmuchread; unsentBufferAmt = validBufferAmt; assert(chunkBytesRemaining >= 0); } else { // Start a new chunk. chunkBytesRemaining = min(chunkSize, remaining); // Create the chunk size hexadecimal string. sprintf(sendBuffer, "%x\r\n", chunkBytesRemaining); // That's all we'll send for now. We'll let a subsequent // call to this function do the rest of the work. unsigned int offset = strlen(sendBuffer); validBufferAmt = offset; unsentBufferAmt = validBufferAmt; assert(chunkBytesRemaining > 0); } return 1; } // Search for 'GET ' and remove it from the buffer. // Return 0 for no match, 1 for success. int Client::chompMethod() { string::size_type pos = completeRequest.find("GET "); if (pos == string::npos) { response = "HTTP/1.0 400 Bad Request: ERROR -- Invalid Method token: "; appendInvalidToken(); return 0; } // Erase the 'GET' from buffer. completeRequest.erase(0, 4); return 1; } // Search for the URI, remove it from the buffer. // Return 0 if we were not able to extract the URI, 1 if we got the URI. int Client::chompRequestURI() { // +Space Request-URI // Space = SP | TAB // Request-URI = Absolute-Path // Absolute-Path = "/" *FileNameChar // FileNameChar = ALPHA | DIGIT | "." | "-" | "_" | "/" // Starting at the beginning of the string, // search for optional whitespace (space or horizontal tab); // followed by a forward slash; // followed by 0 or more alphabetic, digits, . - _ / const char *pattern = "^[ \t]*(/[[:alpha:][:digit:]._/-]*)"; regex_t re; // Build in "re" a pattern matcher for the regular expression. if (regcomp(&re, pattern, REG_EXTENDED) != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } // Now use the matcher against the buffer. const int nmatches = 2; regmatch_t matches[nmatches]; int ret = regexec(&re, completeRequest.c_str(), nmatches, matches, 0); regfree(&re); if (ret == REG_NOMATCH) { response = "HTTP/1.0 400 Bad Request: ERROR -- Invalid Request-URI token: "; appendInvalidToken(); return 0; } else if (ret != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } // Figure out which part of buffer matched the regular expression. unsigned int len = matches[1].rm_eo - matches[1].rm_so; requestedPath = completeRequest.substr(matches[1].rm_so, len); // Delete the part that matched, as we've now consumed it. completeRequest.erase(0, matches[1].rm_eo); // Return indication of successful match. return 1; } // Search for HTTP version, remove it from the buffer passed in. // Return 0 for invalid version, 1 for successful match. int Client::chompHTTPVersion() { // +Space HTTP-Version // HTTP-Version = "HTTP" "/" +DIGIT "." +DIGIT // Search for start of string, 1 or more whitespace; // followed by 'HTTP/' ; // followed by 1 or more digits ; // followed by a . ; // followed by 1 or more digits const char *pattern = "^[ \t]+HTTP/[0-9]+\\.[0-9]+"; regex_t re; if (regcomp(&re, pattern, REG_EXTENDED) != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } regmatch_t matches[1]; int ret = regexec(&re, completeRequest.c_str(), 1, matches, 0); regfree(&re); if (ret == REG_NOMATCH) { response = "HTTP/1.0 400 Bad Request: ERROR -- Invalid HTTP-Version token: "; appendInvalidToken(); return 0; } else if (ret != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } // (Note, we could here check whether the version is one that // we find acceptable.) // Remove the match from buffer. completeRequest.erase(0, matches[0].rm_eo); return 1; } // Search for and remove the CRLF at the end. // Return 0 if the match failed, 1 for succcessful match. int Client::chompCRLF() { // *Space CRLF // Starting at the beginning of string, search for 0 or more whitespace // followed by CRLF. const char *pattern = "^[ \t]*\r\n"; regex_t re; if (regcomp(&re, pattern, REG_EXTENDED) != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } const int nmatches = 1; regmatch_t matches[nmatches]; int ret = regexec(&re, completeRequest.c_str(), nmatches, matches, 0); regfree(&re); if (ret == REG_NOMATCH) { response = "HTTP/1.0 400 ERROR -- Spurious token before CRLF: "; // Append up to the CRLF string::size_type pos = completeRequest.find("\r\n"); response.append(completeRequest.substr(0, pos)); response.append("\r\n\r\n"); return 0; } else if (ret != 0) { response = "HTTP/1.0 400 Bad Request -- Error: regex failure\r\n\r\n"; return 0; } // Delete the matching part from the buffer. completeRequest.erase(0, matches[0].rm_eo); return 1; } // This is a helper routine that's called when parsing has failed. // It appends the apparent invalid token, which is either the first // word in completeRequest or its entirety. void Client::appendInvalidToken() { // Attempt to figure out what the invalid token is. string::size_type pos = completeRequest.find(" "); if (pos != string::npos) // Append the first word. response.append(completeRequest.substr(0, pos)); else { // Append up to the CRLF. string::size_type pos = completeRequest.find("\r\n"); response.append(completeRequest.substr(0, pos)); } response.append("\r\n\r\n"); } // Sets the persistent flag. // Looks for Connection: Keep-Alive in completeRequest and removes it if there. void Client::checkPersistent() { // Each new request must include the keep-alive line to have a // continued persistent connection. persistent = false; // Look for the Connection: Keep-Alive. // Allow a case-insensitive match. // // First try matching at the beginning of our complete request; // then try matching it after a CRLF, to ensure that it's not embedded // in some other header line. const char *pattern = "^[ \t]*connection:[ \t]*keep-alive[ \t]*\r\n"; const int nmatches = 2; regmatch_t matches[nmatches]; regex_t re; int ret; if (regcomp(&re, pattern, REG_EXTENDED | REG_ICASE) != 0) { // Goto's are generally frowned upon, but they do have // their uses when a return or break statement won't suit. goto SECOND_REGEX; } ret = regexec(&re, completeRequest.c_str(), 1, matches, 0); regfree(&re); if (ret == REG_NOMATCH) ; // no match is ok else if (ret != 0) { // Some error with the regexec function, which we will // ignore since headers are optional. } else { persistent = true; // Remove the line from buffer. completeRequest.erase(0, matches[0].rm_eo); return; } SECOND_REGEX: // Now try matching after a CRLF. pattern = "\r\n([ \t]*connection:[ \t]*keep-alive[ \t]*\r\n)"; if (regcomp(&re, pattern, REG_EXTENDED | REG_ICASE) != 0) return; ret = regexec(&re, completeRequest.c_str(), nmatches, matches, 0); regfree(&re); if (ret == REG_NOMATCH) ; // no match is ok else if (ret != 0) ; // ignore error since headers are optional else { persistent = true; completeRequest.erase(matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so); } } // Returns a "200 OK" string with correct keep-alive, content-length, // and/or chunked header lines, followed by a blank line. string Client::successfulResponse() { string ret = "HTTP/1.0 200 OK\r\nContent-Type: "; if (match(resolvedPath.c_str(), "\\.txt$")) ret.append("text/plain"); else if (match(resolvedPath.c_str(), "\\.html$")) ret.append("text/html"); else if (match(resolvedPath.c_str(), "\\.gif$")) ret.append("image/gif"); else if (match(resolvedPath.c_str(), "\\.jpg$")) ret.append("image/jpeg"); else assert(false); if (chunkSize == 0) { ret.append("\r\nContent-Length: "); char buf[64]; sprintf(buf, "%d", fileSize); ret.append(buf).append("\r\n"); } else ret.append("\r\nTransfer-Encoding: chunked\r\n"); ret.append(endOfResponse()); return ret; } // Handles one request from the client, consuming completeRequest. // // Returns: // 0 if the socket should be closed because of an invalid request // 1 if the request was parsed ok int Client::processRequest() { int ret = chompMethod() && chompRequestURI() && chompHTTPVersion() && chompCRLF(); if (!ret) { // Close the socket (even if the client wanted persistence), // because we cannot parse the request (and therefore cannot // tell if persistence was requested). sendAll(socket, response.c_str(), response.length()); if (verbose) cout << "Sending response >" << response << "<" << endl; return 0; } checkPersistent(); // Make sure requested file/dir is in server's working directory. if (!inWorkingDirectory()) { response = "HTTP/1.0 403 Forbidden: "; response.append(requestedPath); response.append("\r\n"); response.append(endOfResponse()); sendAll(socket, response.c_str(), response.length()); if (verbose) cout << "Sending response >" << response << "<" << endl; return 1; } // Now check if the file exists. if (!resolvePath()) { sendAll(socket, response.c_str(), response.length()); if (verbose) cout << "Sending response >" << response << "<" << endl; return 1; } if (verbose) cout << "resolved path: " << resolvedPath << endl; // Open the file and set up to send it. sendFile.open(resolvedPath.c_str(), ios::in | ios::binary); if (sendFile.fail()) { // Open will fail if we don't have permissions to open it. response = "HTTP/1.0 403 Forbidden: "; response.append(requestedPath).append("\r\n"); response.append(endOfResponse()); sendAll(socket, response.c_str(), response.length()); if (verbose) cout << "Sending response >" << response << "<" << endl; return 1; } sending = true; // Initialize counters. validBufferAmt = unsentBufferAmt = chunkBytesRemaining = 0; readSoFar = 0; // Set the file size. struct stat sbuf; int res = stat(resolvedPath.c_str(), &sbuf); assert(res == 0); fileSize = sbuf.st_size; // Send the status line and headers. response = successfulResponse(); sendAll(socket, response.c_str(), response.length()); if (verbose) cout << "Setting sending=true and Sending response >" << response << "<" << endl; return 1; } // Returns false if resolvedPath does not end with one of the supported // file types. bool Client::isSupportedType() { return match(resolvedPath.c_str(), "\\.txt$") || match(resolvedPath.c_str(), "\\.html$") || match(resolvedPath.c_str(), "\\.gif$") || match(resolvedPath.c_str(), "\\.jpg$"); } // Generates resolvedPath based on the rules for resolving requested names. // // Returns: // true if we should send the file referred to by resolvedPath // false if it doesn't exist or is not a supported type (in which // case sets response appropriately) bool Client::resolvePath() { // Add a '.' to the beginning to restrict search scope to // local directory (unless client escapes using "..", but inWorkingDirectory // should catch that case before resolvePath is called). string relativePath = "."; relativePath.append(requestedPath); // Check if it's a file. bool isFile = false, isDir = false, exists = false, trailingSlash; struct stat sbuf; if (stat(relativePath.c_str(), &sbuf) == 0) { exists = true; isFile = sbuf.st_mode & S_IFREG; isDir = sbuf.st_mode & S_IFDIR; } bool indexExists = false; string withIndex = relativePath; // Check if it's a directory and index.html exists in the directory. if (exists && isDir) { // Ensure we have a '/' as the last charcter. if (withIndex.rfind('/') != (withIndex.length() - 1)) withIndex.append("/"); withIndex.append("index.html"); if (stat(withIndex.c_str(), &sbuf) == 0) indexExists = sbuf.st_mode & S_IFREG; } trailingSlash = relativePath.rfind('/') == (relativePath.length() - 1); if (trailingSlash && indexExists) resolvedPath = withIndex; else if (exists && isFile) { resolvedPath = relativePath; if (!isSupportedType()) { response = "HTTP/1.0 501 Not Implemented: "; response.append(requestedPath).append("\r\n"); response.append(endOfResponse()); return false; } } else if (exists && indexExists) resolvedPath = withIndex; else if (exists && isDir) { // Construct a temporary file to capture the output of "ls". char buf[128 + FILENAME_MAX]; sprintf(buf, "/tmp/dirlisting.%d.%d.txt", socket, (int)time(NULL)); resolvedPath = buf; // Run ls and have it write to our temporary file. string command = "ls "; command.append(relativePath).append(" > "); command.append(resolvedPath); system(command.c_str()); } else { response = "HTTP/1.0 404 Not Found: "; response.append(requestedPath).append("\r\n"); response.append(endOfResponse()); return false; } return true; } // Generate the remainder of a response header. This has a keep-alive // header if persistent, and ends with blank line in any case. string Client::endOfResponse() { string ret = ""; if (persistent) ret = "Connection: Keep-Alive\r\n"; ret.append("\r\n"); return ret; } // Ensure that the request is not for a file/directory above the server's // working directory (make sure the client can't escape!). // // Note, as implemented this code will fail if there are subdirectories // containing 3 or more dots, i.e., requests for items like "/.../foo". // // Returns: // true if request resolves in the working directory // false otherwise bool Client::inWorkingDirectory() { // Algorithm: // - skip over single dots '/.' // - names like '/foo' add one level of depth // - parent directory '/..' decreases depth by one // - if at any point depth < 0 then we terminate and return false // - return true if we reach the end of the path and depth >= 0 // // Note that the algorithm isn't perfect (but fails conservatively). // It will return false on a request like "/../foo/somefile" even // if the working directory is "foo" and therefore the request // resolves to "./somefile". string path = requestedPath; int depth = 0; string::size_type pos; while (path.length() > 0) { // Match '/..' followed by a '/' or end of string. if (match(path.c_str(), "^/\\.\\.(/|$)")) { if (--depth < 0) return false; // remove /.. path.erase(0, 3); } // Match '/.' followed by '/' or end of string. else if (match(path.c_str(), "^/\\.(/|$)")) // remove /. path.erase(0, 2); else { // Remove everything up to next '/'. // Erase the whole string if no more '/' in string. pos = path.find("/", 1); if (pos != 1) // There should be at least one character // before the 2nd '/', i.e., '//' should // not increase our depth. ++depth; path.erase(0, pos); } } return true; }