#!/usr/bin/ruby

#
# Copyright (c) 2010 Thomer M. Gil [http://thomer.com/]
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#

#
# cuerator generates a PDF cue sheet for bike rides from coordinates in a GPX
# or KML file.
#
# cuerator home page at http://thomer.com/cuerator/
#
# Version 0.1.0, 04/14/2010: initial release
# Version 0.1.1, 04/14/2010: added --miles option
# Version 0.1.2, 04/14/2010: footer in cue sheet
# Version 0.1.3, 04/18/2010: accumulated distance in cue sheet
# Version 0.1.4, 08/30/2010: oe=UTF8 bugfix suggested by Brian Feinberg
#


require 'getoptlong'

require 'rubygems'
require 'rexml/document'

begin
  require 'geokit' # for geocoding
rescue Exception => e
  warn "you do not have the 'geokit' gem installed."
  warn "please run 'gem install geokit' and try again"
  exit(-1)
end

begin
  require 'prawn'  # for pdf
rescue Exception => e
  warn "you do not have the 'prawn' gem installed"
  warn "please run 'gem install prawn' and try again"
  exit(-1)
end

include Geokit::Geocoders

# {{{ String
class String
  def strip_html
    self.gsub(/<\/?[^>]*>/, '')
  end

  # This code is copied from "The Ruby Way", Second Edition, by Hal Fulton, page 97.
  def levenshtein(other, ins=2, del=2, sub=1)
    # ins, del, sub are weighted costs
    return nil if self.nil? || self.empty?
    return nil if other.nil? || other.empty?
    dm = []        # distance matrix

    # Initialize first row values
    dm[0] = (0..self.length).collect { |i| i * ins }
    fill = [0] * (self.length - 1)

    # Initialize first column values
    for i in 1..other.length
      dm[i] = [i * del, fill.flatten]
    end

    # populate matrix
    for i in 1..other.length
      for j in 1..self.length
    # critical comparison
        dm[i][j] = [
             dm[i-1][j-1] +
               (self[j-1] == other[i-1] ? 0 : sub),
                 dm[i][j-1] + ins,
             dm[i-1][j] + del
       ].min
      end
    end

    # The last value in matrix is the
    # Levenshtein distance between the strings
    dm[other.length][self.length]
  end
end
# }}}
# {{{ GoogleDirections
class GoogleDirections
  attr_reader :waypoints
  attr_accessor :unit
  include Geokit::Mappable

  @@mode_map = {
    :biking => 'b',
    :walking => 'w',
    :driving => '',
    :no_highways => 'h',
    :no_tolls => 't',
    :public_transit => 'r',
  }

  def initialize(args = {})
    @unit = args[:unit] || :kilometers
    @waypoints = []
    self.mode = args[:mode]
  end

  def mode=(m)
    raise "bad mode" if !@@mode_map.has_key?(m)
    @mode = m
  end

  def <<(v)
    @waypoints << v
  end

  def directions
    saddr = "#{@waypoints[0].lat},#{@waypoints[0].lng}"
    daddr = "#{@waypoints[0].lat},#{@waypoints[0].lng}"

    # the via param is all points that are not the source (0) and the
    # destination; that is, it's all intermediary points.  the first
    # intermediary point is '1'.
    via_param = ''
    if !@waypoints.empty? && @waypoints.size > 2
      @waypoints[2..-1].each do |wp|
        daddr += "+to:#{wp.lat},#{wp.lng}"
        via_param = (1..(@waypoints.size-2)).to_a.join(',')
      end
    end

    # see http://mapki.com/wiki/Google_Map_Parameters
    url = "http://maps.google.com/maps?f=d&source=s_d&saddr=#{saddr}&daddr=#{daddr}&hl=en&oe=UTF8&mra=ls&dirflg=#{@@mode_map[@mode]}&via=#{via_param}"
    if @unit == :kilometers
      url += "&doflg=ptk"
    else
      url += "&doflg=ptm"
    end

    # parse output
    puts url if $options['--verbose']
    output = `curl -s '#{url}'`
    parts = output.split(/td class=num\>/)
    parts.shift # everything before first entry gets tossed

    @route = []
    prev_dist = 0.0
    parts.each do |part|
      part.match(/td\s+class\s*=\s*['"]?dirsegtext['"]?\s+[^>]+>(.+?)<\/td>.*?<div\s+id\s*=\s*['"]?sxdist['"]?\s+class\s*=\s*['"]?nw['"]?>([\d\.]+)\S+?(ft|mi|m|km)\s*<\/div>/m)
      line = "#$1"
      n, unit = $2.to_f, $3
      dist = "#{n} #{unit}"

      # total distance up to (but not including) this step
      $total_dist += prev_dist

      # compute added distance for _next_ step
      if unit == 'm'
        prev_dist = n/1000.0
      elsif unit == 'ft'
        prev_dist = n/5280.0
      else
        prev_dist = n
      end

      # if there is a second line (eg, "Continue on so-and-so road") then
      # break them up into two chunks; add the first chunk without the
      # distance to the route; then add continuation with the distance
      if part.match(/<div\s+class=["']?dirsegnote\s+note_\S+?["']?>(.*?)<\/div>/m)
        continuation = $1
        line.sub!(/\s*<div\s+class=['"]?dirsegnote.*/, '')
        @route << {:line => line.strip_html, :total_dist => $total_dist}
        @route << {:line => "#{continuation} (#{dist})".strip_html, :total_dist => $total_dist}
      else
        @route << {:line => "#{line} (#{dist})".strip_html, :total_dist => $total_dist}
      end
    end
    @route
  end
end
# }}}

# {{{ street_similar
# returns whether or not two full addresses are similar; this is to weed out
# perceived differences between "Massachusetts Ave W" and "Massachusetts Ave
# 2" or "Concord Ave, Cambridge" and "Concord Ave, Belmont" or "1-39 Concord
# Ave" and "40-79 Concord Ave"
#
def street_similar(a1, a2)
  # strip off the street numbers and everything after the first comma (eg,
  # town name, zip code, etc)
  a1x = a1.sub(/^\s*[0-9-]+\s*/, '').sub(/,.*/, '')
  a2x = a2.sub(/^\s*[0-9-]+\s*/, '').sub(/,.*/, '')

  ldist = a1x.levenshtein(a2x)
  return (!ldist.nil? && ldist < 5)
end
# }}}

GOOGLE_LIMIT = 25
DEFAULT_PDF = "directions.pdf"
FONT_SIZE = 10

# {{{ options
$options = {}
opts = GetoptLong.new(*[
  [ "--help", GetoptLong::NO_ARGUMENT ],
  [ "--miles", GetoptLong::NO_ARGUMENT ],
  [ "--pdf", GetoptLong::REQUIRED_ARGUMENT ],
  [ "--size", GetoptLong::REQUIRED_ARGUMENT ],
  [ "--verbose", GetoptLong::NO_ARGUMENT ],
])
opts.each { |opt, arg| $options[opt] = arg }
# }}}

if $options['--help']
  puts <<EOF

usage: cuerator [--verbose] [--pdf P] [--miles] [--size S] FILE

  --help       : this help
  --miles      : use feet and miles, not meters and kilometers
  --pdf P      : name of pdf output (default #{DEFAULT_PDF})
  --size S     : font size in the PDF (default #{FONT_SIZE})
  --verbose    : increase verbosity

  FILE         : a .gpx or .kml file

example:

  $ ./cuerator route.kml

EOF
  exit(0)
end

STDOUT.sync = true

if ARGV[0].nil?
  warn "you need to specify a a .gpx or .kml file"
  exit(-1)
end

if !File.exists?(ARGV[0])
  warn "#{ARGV[0]} not found"
  exit(-1)
end

# {{{ caching
# use a cache to store geocode answers
REVERSE_GEOCODE_CACHE = File.join(File.dirname(__FILE__), 'reverse_geocode.cache')
if File.exists?(REVERSE_GEOCODE_CACHE)
  $reverse_geocode_cache = Marshal.load(File.read(REVERSE_GEOCODE_CACHE))
else
  $reverse_geocode_cache = {}
end
at_exit do
  File.open(REVERSE_GEOCODE_CACHE, 'w+') { |f| f.write(Marshal.dump($reverse_geocode_cache)) }
end
# }}}


# parse the XML file and try to interpret it as GPX and KML file
doc = REXML::Document.new(File.new(ARGV[0]))
points = []

# gpx file
if !(gpx_points = REXML::XPath.match(doc, '//trkpt')).empty?
  points = gpx_points.collect {|point| Geokit::LatLng.new(point.attributes['lat'], point.attributes['lon'])}
# kml file
elsif !(kml_coordinates = REXML::XPath.match(doc, '//coordinates')).empty?
  points = kml_coordinates[0].text.split(/\n/)[1..-2].collect {|s| thunk = s.strip.split(/,/); Geokit::LatLng.new(thunk[1].to_f, thunk[0].to_f)}
else
  warn "did not recognize file format"
  exit(-1)
end

if points.empty?
  warn "no points found in #{ARGV[0]}"
  exit(-1)
end

gd = GoogleDirections.new(:mode => :biking, :unit => $options['--miles'] ? :miles : :kilometers)
prev_ll, prev_address, prev_suggested_bounds = nil, nil, nil
final_directions = []
$total_dist = 0.0
points.each_with_index do |ll,n|
  printf("\r%4d/%4d...", n+1, points.size)

  # skip this point if it isn't at least 100 meters away from the last
  # waypoint
  next if gd.waypoints.size >= 1 && gd.waypoints[-1].distance_to(ll, :units => :kilometers) < 0.1

  # skip this point if it is within the suggested bounds of the last point we
  # added to the route
  next if prev_suggested_bounds && prev_suggested_bounds.contains?(ll)

  # reverse geo-code and skip if the address is too similar to the previous
  # one
  if $reverse_geocode_cache.has_key?(ll)
    revgeo = $reverse_geocode_cache[ll]
  else
    revgeo = $reverse_geocode_cache[ll] = GoogleGeocoder.reverse_geocode(ll)
  end
  next if prev_address && street_similar(prev_address, revgeo.full_address)

  # add this to the list of waypoints
  gd << ll

  prev_ll = ll
  prev_address = revgeo.full_address
  prev_suggested_bounds = revgeo.suggested_bounds

  # pass points to google for directions
  if gd.waypoints.size >= GOOGLE_LIMIT
    final_directions.concat(gd.directions)
    gd = GoogleDirections.new(:mode => :biking, :unit => $options['--miles'] ? :miles : :kilometers)
    gd << ll
  end
end

# remainder points
if gd.waypoints.size > 1
  final_directions.concat(gd.directions)
end

# generate PDF
pdf_out = $options['--pdf'] || DEFAULT_PDF
font_size = ($options['--size'] || FONT_SIZE).to_i
Prawn::Document.generate(pdf_out) do |pdf|
  pdf.font_size = font_size
  final_directions.each_with_index do |h,i|
    line = sprintf("%d. [%.2f] %s", i+1, h[:total_dist], h[:line])
    pdf.text(line)
  end
  pdf.text("\nGenerated by cuerator, written by Thomer M. Gil [http://thomer.com/cuerator/].")
end
puts "\rgenerated #{pdf_out}. done."
