Raw File
hub_http_server.coffee
##############################################################################
#
#    CoCalc: Collaborative Calculation in the Cloud
#
#    Copyright (C) 2016, Sagemath Inc.
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################

###
The Hub's HTTP Server
###

fs           = require('fs')
path_module  = require('path')
Cookies      = require('cookies')
util         = require('util')
ms           = require('ms')

async        = require('async')
cookieParser = require('cookie-parser')
body_parser  = require('body-parser')
express      = require('express')
formidable   = require('formidable')
http_proxy   = require('http-proxy')
http         = require('http')
winston      = require('winston')

winston      = require('./winston-metrics').get_logger('hub_http_server')

misc         = require('smc-util/misc')
{defaults, required} = misc

misc_node    = require('smc-util-node/misc_node')
hub_register = require('./hub_register')
auth         = require('./auth')
access       = require('./access')
hub_projects = require('./projects')
MetricsRecorder  = require('./metrics-recorder')


{http_message_api_v1} = require('./api/handler')

# Rendering stripe invoice server side to PDF in memory
{stripe_render_invoice} = require('./stripe/invoice')

SMC_ROOT    = process.env.SMC_ROOT
STATIC_PATH = path_module.join(SMC_ROOT, 'static')

exports.init_express_http_server = (opts) ->
    opts = defaults opts,
        base_url       : required
        dev            : false       # if true, serve additional dev stuff, e.g., a proxyserver.
        database       : required
        compute_server : required
        cookie_options : undefined
    winston.debug("initializing express http server")
    winston.debug("MATHJAX_URL = ", misc_node.MATHJAX_URL)

    server_settings = require('./server-settings')(opts.database)

    # Create an express application
    router = express.Router()
    app    = express()
    http_server = http.createServer(app)
    app.use(cookieParser())

    # Enable compression, as
    # suggested by http://expressjs.com/en/advanced/best-practice-performance.html#use-gzip-compression
    # NOTE "Express runs everything in order" -- https://github.com/expressjs/compression/issues/35#issuecomment-77076170
    compression = require('compression')
    app.use(compression())

    # Very large limit, since can be used to send, e.g., large single patches, and
    # the default is only 100kb!  https://github.com/expressjs/body-parser#limit-2
    router.use(body_parser.json({limit: '3mb'}))
    router.use(body_parser.urlencoded({extended: true, limit: '3mb'}))

    # initialize metrics
    response_time_histogram = MetricsRecorder.new_histogram('http_histogram', 'http server'
                                  buckets : [0.01, 0.1, 1, 2, 10, 20]
                                  labels: ['path', 'method', 'code']
                              )
    # response time metrics
    router.use (req, res, next) ->
        res_finished_h = response_time_histogram.startTimer()
        original_end = res.end
        res.end = ->
            original_end.apply(res, arguments)
            {dirname}   = require('path')
            path_split  = req.path.split('/')
            # for API paths, we want to have data for each endpoint
            path_tail   = path_split[path_split.length-3 ..]
            is_api      = path_tail[0] == 'api' and path_tail[1] == 'v1'
            if is_api
                dir_path = path_tail.join('/')
            else
                # for regular paths, we ignore the file
                dir_path = dirname(req.path).split('/')[..1].join('/')
            #winston.debug('response timing/path_split:', path_tail, is_api, dir_path)
            res_finished_h({path:dir_path, method:req.method, code:res.statusCode})
        next()

    # save utm parameters and referrer in a (short lived) cookie or read it to fill in locals.utm
    # webapp takes care of consuming it (see misc_page.get_utm)
    router.use (req, res, next) ->
        # quickly return in the usual case
        if Object.keys(req.query).length == 0
            next()
            return
        utm = {}

        utm_cookie = req.cookies[misc.utm_cookie_name]
        if utm_cookie
            try
                data = misc.from_json(window.decodeURIComponent(utm_cookie))
                utm = misc.merge(utm, data)

        for k, v of req.query
            continue if not misc.startswith(k, 'utm_')
            # untrusted input, limit the length of key and value
            k = k[4...50]
            utm[k] = v[...50] if k in misc.utm_keys

        if Object.keys(utm).length
            utm_data = encodeURIComponent(JSON.stringify(utm))
            res.cookie(misc.utm_cookie_name, utm_data, {path: '/', maxAge: ms('1 day'), httpOnly: false})
            res.locals.utm = utm

        referrer_cookie = req.cookies[misc.referrer_cookie_name]
        if referrer_cookie
            res.locals.referrer = referrer_cookie

        winston.debug("HTTP server: #{req.url} -- UTM: #{misc.to_json(res.locals.utm)}")
        next()

    app.enable('trust proxy') # see http://stackoverflow.com/questions/10849687/express-js-how-to-get-remote-client-address

    # The webpack content. all files except for unhashed .html should be cached long-term ...
    cacheLongTerm = (res, path) ->
        if not opts.dev  # ... unless in dev mode
            timeout = ms('100 days') # more than a year would be invalid
            res.setHeader('Cache-Control', "public, max-age='#{timeout}'")
            res.setHeader('Expires', new Date(Date.now() + timeout).toUTCString());

    # robots.txt: disable indexing for published subdirectories, in particular to avoid a lot of 500/404 errors
    router.use '/robots.txt', (req, res) ->
        res.header("Content-Type", "text/plain")
        res.header('Cache-Control', 'private, no-cache, must-revalidate')
        res.write('''
                  User-agent: *
                  Allow: /share
                  Disallow: /projects/*
                  Disallow: /*/raw/
                  Disallow: /*/port/
                  Disallow: /haproxy
                  ''')
        res.end()

    # The /static content
    router.use '/static',
        express.static(STATIC_PATH, setHeaders: cacheLongTerm)

    router.use '/policies',
        express.static(path_module.join(STATIC_PATH, 'policies'), {maxAge: 0})
    router.use '/doc',
        express.static(path_module.join(STATIC_PATH, 'doc'), {maxAge: 0})

    router.get '/', (req, res) ->
        # for convenicnece, a simple heuristic checks for the presence of the remember_me cookie
        # that's not a security issue b/c the hub will do the heavy lifting
        # TODO code in comments is a heuristic looking for the remember_me cookie, while when deployed the haproxy only
        # looks for the has_remember_me value (set by the client in accounts).
        # This could be done in different ways, it's not clear what works best.
        #remember_me = req.cookies[opts.base_url + 'remember_me']
        has_remember_me = req.cookies[auth.remember_me_cookie_name(opts.base_url)]
        if has_remember_me == 'true' # and remember_me?.split('$').length == 4 and not req.query.signed_out?
            res.redirect(opts.base_url + '/app')
        else
            #res.cookie(opts.base_url + 'has_remember_me', 'false', { maxAge: 60*60*1000, httpOnly: false })
            res.sendFile(path_module.join(STATIC_PATH, 'index.html'), {maxAge: 0})

    router.get '/app', (req, res) ->
        #res.cookie(opts.base_url + 'has_remember_me', 'true', { maxAge: 60*60*1000, httpOnly: false })
        res.sendFile(path_module.join(STATIC_PATH, 'app.html'), {maxAge: 0})

    # The base_url javascript, which sets the base_url for the client.
    router.get '/base_url.js', (req, res) ->
        res.send("window.app_base_url='#{opts.base_url}';")

    # used by HAPROXY for testing that this hub is OK to receive traffic
    router.get '/alive', (req, res) ->
        if not hub_register.database_is_working()
            # this will stop haproxy from routing traffic to us
            # until db connection starts working again.
            winston.debug("alive: answering *NO*")
            res.status(404).end()
        else
            res.send('alive')

    router.get '/metrics', (req, res) ->
        res.header("Content-Type", "text/plain")
        res.header('Cache-Control', 'private, no-cache, no-store, must-revalidate')
        metricsRecorder = MetricsRecorder.get()
        if metricsRecorder?
            # res.send(JSON.stringify(opts.metricsRecorder.get(), null, 2))
            res.send(metricsRecorder.metrics())
        else
            res.send(JSON.stringify(error:'Metrics recorder not initialized.'))

    # /concurrent -- used by kubernetes to decide whether or not to kill the container; if
    # below the warn thresh, returns number of concurrent connection; if hits warn, then
    # returns 404 error, meaning hub may be unhealthy.  Kubernetes will try a few times before
    # killing the container.  Will also return 404 if there is no working database connection.
    router.get '/concurrent-warn', (req, res) ->
        c = opts.database.concurrent()
        if not hub_register.database_is_working() or c >= opts.database._concurrent_warn
            winston.debug("/concurrent: not healthy, since concurrent >= #{opts.database._concurrent_warn}")
            res.status(404).end()
        else
            res.send("#{c}")

    # Return number of concurrent connections (could be useful)
    router.get '/concurrent', (req, res) ->
        res.send("#{opts.database.concurrent()}")

    # HTTP API
    router.post '/api/v1/*', (req, res) ->
        h = req.header('Authorization')
        if not h?
            res.status(400).send(error:'You must provide authentication via an API key.')
            return
        [type, user] = misc.split(h)
        switch type
            when "Bearer"
                api_key = user
            when "Basic"
                api_key = new Buffer.from(user, 'base64').toString().split(':')[0]
            else
                res.status(400).send(error:"Unknown authorization type '#{type}'")
                return

        http_message_api_v1
            event          : req.path.slice(req.path.lastIndexOf('/') + 1)
            body           : req.body
            api_key        : api_key
            logger         : winston
            database       : opts.database
            compute_server : opts.compute_server
            ip_address     : req.ip
            cb      : (err, resp) ->
                if err
                    res.status(400).send(error:err)  # Bad Request
                else
                    res.send(resp)

    # HTTP-POST-based user queries
    require('./user-query').init(router, auth.remember_me_cookie_name(opts.base_url), opts.database)

    # HTTP-POST-based user API
    require('./user-api').init
        router         : router
        cookie_name    : auth.remember_me_cookie_name(opts.base_url)
        database       : opts.database
        compute_server : opts.compute_server
        logger         : winston

    # stripe invoices:  /invoice/[invoice_id].pdf
    stripe_connections = require('./stripe/connect').get_stripe()
    if stripe_connections?
        router.get '/invoice/*', (req, res) ->
            winston.debug("/invoice/* (hub --> client): #{misc.to_json(req.query)}, #{req.path}")
            path = req.path.slice(req.path.lastIndexOf('/') + 1)
            i = path.lastIndexOf('-')
            if i != -1
                path = path.slice(i+1)
            i = path.lastIndexOf('.')
            if i == -1
                res.status(404).send("invoice must end in .pdf")
                return
            invoice_id = path.slice(0,i)
            winston.debug("id='#{invoice_id}'")

            stripe_render_invoice(stripe_connections, invoice_id, true, res)
    else
        router.get '/invoice/*', (req, res) ->
            res.status(404).send("stripe not configured")

    # return uuid-indexed blobs (mainly used for graphics)
    router.get '/blobs/*', (req, res) ->
        #winston.debug("blob (hub --> client): #{misc.to_json(req.query)}, #{req.path}")
        if not misc.is_valid_uuid_string(req.query.uuid)
            res.status(404).send("invalid uuid=#{req.query.uuid}")
            return
        if not hub_register.database_is_working()
            res.status(404).send("can't get blob -- not connected to database")
            return
        opts.database.get_blob
            uuid : req.query.uuid
            cb   : (err, data) ->
                if err
                    res.status(500).send("internal error: #{err}")
                else if not data?
                    res.status(404).send("blob #{req.query.uuid} not found")
                else
                    filename = req.path.slice(req.path.lastIndexOf('/') + 1)
                    if req.query.download?
                        # tell browser to download the link as a file instead
                        # of displaying it in browser
                        res.attachment(filename)
                    else
                        res.type(filename)
                    res.send(data)

    # TODO: is this cookie trick dangerous in some surprising way?
    router.get '/cookies', (req, res) ->
        if req.query.set
            # TODO: implement expires as part of query?  not needed for now.
            maxAge = 1000*24*3600*30*6  # 6 months -- long is fine now since we support "sign out everywhere" ?
            cookies = new Cookies(req, res, opts.cookie_options)
            cookies.set(req.query.set, req.query.value, {maxAge:maxAge})
        res.end()

    # Used to determine whether or not a token is needed for
    # the user to create an account.
    router.get '/registration', (req, res) ->
        if server_settings.all.account_creation_token
            res.json({token:true})
        else
            res.json({})

    router.get '/customize', (req, res) ->
        res.json(server_settings.pub)

    # Save other paths in # part of URL then redirect to the single page app.
    router.get ['/projects*', '/help*', '/settings*', '/admin*', '/dashboard*'], (req, res) ->
        url = require('url')
        q = url.parse(req.url, true).search # gives exactly "?key=value,key=..."
        res.redirect(opts.base_url + "/app#" + req.path.slice(1) + q)

    # Return global status information about smc
    router.get '/stats', (req, res) ->
        if not hub_register.database_is_working()
            res.json({error:"not connected to database"})
            return
        opts.database.get_stats
            update : false   # never update in hub b/c too slow. instead, run $ hub --update_stats via a cronjob every minute
            ttl    : 30
            cb     : (err, stats) ->
                res.header('Cache-Control', 'private, no-cache, no-store, must-revalidate')
                if err
                    res.status(500).send("internal error: #{err}")
                else
                    res.header("Content-Type", "application/json")
                    res.send(JSON.stringify(stats, null, 1))

    ###
    # Stripe webhooks -- not done
    router.post '/stripe', (req, res) ->
        form = new formidable.IncomingForm()
        form.parse req, (err, fields, files) ->
            # record and act on the webhook here -- see https://stripe.com/docs/webhooks
            # winston.debug("STRIPE: webhook -- #{err}, #{misc.to_json(fields)}")
        res.send('')
    ###

    # Get the http server and return it.
    if opts.base_url
        app.use(opts.base_url, router)
    else
        app.use(router)

    if opts.dev
        dev = require('./dev/hub-http-server')
        dev.init_http_proxy(app, opts.database, opts.base_url, opts.compute_server, winston)
        dev.init_websocket_proxy(http_server, opts.database, opts.base_url, opts.compute_server, winston)
        dev.init_share_server(app, opts.database, opts.base_url, winston);

    return {http_server:http_server, express_router:router}

back to top