In NodeJS if you need to take a URL and extract the domain, protocol and port out, you may have noticed it is harder than it sounds.

We came across this problem when creating itsback.at, and this is what we came up with. It currently only checks for http(s) and not other protocols like ftp links. It should return you the domain and port number, defaulting to port 80, if it isn't determinable what the port should be.

'use strict'

const url = require('url')

function getUrl (dataUrl) {  
  if (dataUrl.split('://').length === 1 || dataUrl.startsWith('://')) {
    dataUrl = `http://${dataUrl.replace('://', '')}`
  }
  return dataUrl
}

let findUrlKey = (rawUrl) => {  
  rawUrl = getUrl(rawUrl)

  let inputUrl = url.parse(rawUrl)
    , domain = inputUrl.hostname || inputUrl.pathname.split('/')[0]
    , protocol = inputUrl.protocol || 'http'
    , port = inputUrl.port || (protocol.indexOf('https') > -1 ? '443' : '80')

  return domain + ':' + port
}

module.exports = findUrlKey  

We did this in a test driven manner and the tests demonstrate quite clearly what this can and can't do. It isn't perfect, and there are some odd edge cases, although it could easily be expanded to include other protocols for example.

const assert = require('assert')  
    , findUrlKey = require('../lib/find-url-key')
    , fixtures =
      [ { url: 'http://google.com', result: 'google.com:80' }
      , { url: 'https://google.com', result: 'google.com:443' }
      , { url: 'https://google.com:3000', result: 'google.com:3000' }
      , { url: 'http://google.com:3000', result: 'google.com:3000' }
      , { url: 'http://google.com:3000/path', result: 'google.com:3000' }
      , { url: 'http://google.com:3000', result: 'google.com:3000' }
      , { url: 'google.com', result: 'google.com:80' }
      , { url: 'google.com/path', result: 'google.com:80' }
      , { url: 'google.com:3000/path', result: 'google.com:3000' }
      , { url: 'google.com:3000', result: 'google.com:3000' }
      , { url: '://google.com:3000', result: 'google.com:3000' }
      , { url: '://google.com', result: 'google.com:80' }
      , { url: 'ftp://google.com:3000', result: 'google.com:3000' }
      , { url: 'ftp://google.com', result: 'google.com:80' }
      ]

describe('Test URL parsing logic', () => {  
  fixtures.forEach((fixture) => {
    it('should return: ' + JSON.stringify(fixture.result), (done) => {
      assert.deepEqual(findUrlKey(fixture.url), fixture.result, 'incorrect domain or port')
      done()
    })
  })
})

:wq