DROP FUNCTION address_copy(BIGINT);
CREATE OR REPLACE FUNCTION address_copy(dataid BIGINT) RETURNS BIGINT
AS $$
# -*- coding: utf8 -*-
  data_query = plpy.prepare("SELECT datatypeid, value, objectid, source, accountid, language, time_anchor, time_end, time_start FROM data WHERE dataid = $1", ["bigint"])
  data = plpy.execute(data_query, [dataid])
  datatypeid = data[0]["datatypeid"]
  address = data[0]["value"]
  objectid = data[0]["objectid"]
  source = data[0]["source"]
  accountid = data[0]["accountid"]
  time_end = data[0]["time_end"]
  time_anchor = data[0]["time_anchor"]
  time_start = data[0]["time_start"]
  lng = data[0]["language"]

  if address:
    address = address.strip()
  else: # address = NULL, deleting
    street_remove = plpy.prepare("DELETE FROM street_connection WHERE dataid = $1", ["bigint"])
    plpy.execute(street_remove, [dataid])    

  # matching addresses.. and saving the street connection
  if datatypeid == 'address_main':
    address_type = 'address_type.1'
  elif datatypeid == 'address_side':
    address_type = 'address_type.2'
  elif datatypeid == 'address_virtual':
    address_type = 'address_type.3'
  elif datatypeid == 'address':
    address_type = 'address_type.1'
  else:
    return 0

  # reading address format
  regions_query = plpy.prepare("SELECT regionid, regiongroupid FROM object_region INNER JOIN region ON (region.id = object_region.regionid) WHERE objectid = $1 AND NOT region.deleted AND regiongroupid < 13 ORDER BY regiongroupid ASC NULLS LAST", ["int"])
  regions = plpy.execute(regions_query, [objectid])
  countryid = cityid = districtid = 0
  for row in regions:
    if row["regiongroupid"] == 3:
      countryid = row["regionid"]
    elif row["regiongroupid"] == 6:
      cityid = row["regionid"]
    elif row["regiongroupid"] == 7:
      districtid = row["regionid"]

  format_query = plpy.prepare("SELECT value, objectid FROM data WHERE objectid = $1 AND visible AND datatypeid = 'address_format'", ["int"])
  formats = plpy.execute(format_query, [countryid])
  if not formats:
    return 0
  format = formats[0]["value"]

  import re

  if format == "address_format.1":
    regexp = "^(([[:alnum:]' ]*[[:space:].])?([[:alpha:]]{2,}[' -]?)+([[:alpha:]]{2,}\Z)?)"
    street_q = plpy.prepare("SELECT substring($1, $2) AS subs", ["text", "text"])
    streets = plpy.execute(street_q, [address, regexp])
    if not streets:
      return 0
    street = streets[0]["subs"]
    if not street:
      plpy.notice("empty street match on address_format.1 and "+address)
      return 0
    range = address.replace(street,"",1)
    street = street.strip()
    #preg = re.compile("(?P<street>[^\W\d_]+(([ \t][^\W\d_]+)+)?('[^\W\d_]+)?)([ \t]+(?P<start>[0-9]+)(?P<startext>[a-zA-Z]+)?\s*-?\s*(?P<end>[0-9]+)?(?P<endext>[a-zA-Z]+)?)?", re.UNICODE)
  #elif format == "address_format.2":
    #preg = re.compile("(?P<street>[^\W\d_]+(([ \t][^\W\d_]+)+)?('[^\W\d_]+)?)([ \t]*,[ \t]*(?P<start>[0-9]+)(?P<startext>[a-zA-Z]+)?\s*-?\s*(?P<end>[0-9]+)?(?P<endext>[a-zA-Z]+)?)?", re.UNICODE)
  elif format == "address_format.3":
    regexp = "((((East|South|West|North|N|NE|E|SE|S|SW|W|NW)\s*([[:digit:]]{1,3}(st|nd|rd|th)?)\s*)|([[:alpha:]]{3,}[' -])*)([[:alpha:]]{2,})\Z)"
    street_q = plpy.prepare("SELECT substring($1, $2) AS subs", ["text", "text"])
    streets = plpy.execute(street_q, [address, regexp])
    if not streets:
      return 0
    street = streets[0]["subs"]
    if not street:
      plpy.notice("empty street match on address_format.3 and "+address)
      return 0
    range = address.replace(street,"",1)
    street = street.strip()
    #preg = re.compile("((?P<start>[0-9]+)(?P<startext>[a-zA-Z]+)?\s*-?\s*(?P<end>[0-9]+)?(?P<endext>[a-zA-Z]+)?)?[ \t]*(?P<street>[^\W\d_]+(([ \t][^\W\d_]+)+)?('[^\W\d_]+)?)", re.UNICODE)
  #elif format == "address_format.4":
    #preg = re.compile("((?P<start>[0-9]+)(?P<startext>[a-zA-Z]+)?\s*-?\s*(?P<end>[0-9]+)?(?P<endext>[a-zA-Z]+)?,[ \t]*)?(?P<street>[^\W\d_]+(([ \t][^\W\d_]+)+)?('[^\W\d_]+)?)", re.UNICODE)
  #elif format == "address_format.5":
    #preg = re.compile("(\w+)(\s+[0-9-]*)?", re.UNICODE)
  #elif format == "address_format.6":
    #preg = re.compile("(\w+)", re.UNICODE)
  else:
    plpy.notice("with "+format+" no matching address")
    return 0
    # address format unknown, using default
    #preg = re.compile("(?P<street>[^\W\d_]+(([ \t][^\W\d_]+)+)?('[^\W\d_]+)?)([ \t]+(?P<start>[0-9]+)(?P<startext>[a-zA-Z]+)?\s*-?\s*(?P<end>[0-9]+)?(?P<endext>[a-zA-Z]+)?)?", re.UNICODE)

  #res = preg.match(address).groupdict()
  # inserting or updating existing street connection

  range = range.strip()
  if len(range) > 0:
    range_preg = re.compile("(?P<start>[0-9]+[a-zA-Z]*)?\s*-?\s*(?P<end>[0-9]+[a-zA-Z]*)?")
    exts = range_preg.match(range).groupdict()
  else:
    exts = None

  #plpy.notice("house numbers: "+ str(exts))
  # finding street
  if cityid > 0 and districtid > 0:
    street_q = plpy.prepare("SELECT id FROM street WHERE NOT deleted AND parent_id IN ($1, $2) AND umlaut_flatten($3) = umlaut_flatten(name)", ["int", "int", "text"])
    streets = plpy.execute(street_q, [cityid, districtid, street])
  elif cityid > 0:
    street_q = plpy.prepare("SELECT id FROM street WHERE NOT deleted AND parent_id IN ($1) AND umlaut_flatten($2) = umlaut_flatten(name)", ["int", "text"])
    streets = plpy.execute(street_q, [cityid, street])
  else:
    # no city defined for the address, cannot find street
    plpy.notice("no city defined for address, cannot find street")
    return 0

  # no street found with this name
  if not streets:
    plpy.notice("cannot find street "+street+" in given city "+str(cityid))
    return 0

  streetid = streets[0]["id"]

  # removing old entry from this street and this object
  street_remove = plpy.prepare("DELETE FROM street_connection WHERE targetid = $1 AND streetid = $2 RETURNING dataid", ["int", "int"])
  dataids = plpy.execute(street_remove, [objectid, streetid])
  if dataids:
    datid = dataids[0]['dataid']
  else:
    datid = None

  if exts:
    if not exts.has_key('start') or exts['start'] == None:
      exts['start'] = ''
      exts['end'] = ''
    elif not exts.has_key('end') or exts['end'] == None:
      exts['end'] = exts['start']
  else:
    exts = {'start':'', 'end':''}

  # inserting new street connection
  sconn = ":".join([str(streetid), address_type, exts["start"], exts["end"], ""])
  street_add = plpy.prepare("SELECT save_data('street_connection', $1, $2, $3, $4, $5, $6, $7, $8, $9, NULL, $10, $11) AS did", ["text", "int", "text", "int", "text", "int", "int", "int", "bigint", "boolean", "bigint"])
  #street_add = plpy.prepare("INSERT INTO street_connection (targetid, streetid, address_start, address_end, address_type) VALUES ($1, $2, $3, $4, $5)", ["int", "int", "text", "text", "text"])
  #plpy.notice("sconn:"+str(sconn)+" - objectid:"+str(objectid)+" - source:"+str(source)+" - accid:"+str(accountid)+" - lng:"+str(lng)+" - time_start:"+str(time_start)+" - time_anchor:"+str(time_anchor)+" - time_end:"+str(time_end)+" - datid:"+str(datid)+" - dataid:"+str(dataid))
  rval = plpy.execute(street_add, [sconn, objectid, source, accountid, lng, time_start, time_anchor, time_end, datid, True, dataid])

  return rval[0]["did"]

$$ LANGUAGE plpythonu;

