import networkx as nx
import pandas as pd
import numpy as np
import geopandas as gpd
import functools
import community
import array
import numbers
import warnings
from shapely.ops import polygonize_full, polygonize, unary_union
from shapely.geometry import Point, LineString, Polygon, MultiPolygon, mapping, MultiLineString
from shapely.ops import cascaded_union, linemerge, nearest_points
pd.set_option("display.precision", 3)
from .graph import graph_fromGDF, dual_id_dict
from .utilities import dict_to_df, min_distance_geometry_gdf
[docs]def identify_regions(dual_graph, edges_gdf, weight = None):
"""
It identifies regions in the street network, using the dual graph representation.
The modularity optimisation technique is used to identify urban regions.
Parameters
----------
dual_graph: Networkx.Graph
The dual graph of an urban area.
edges_gdf: LineString GeoDataFrame
The (primal) street segments GeoDataFrame.
weight: string
The edges' attribute to use when extracting the communities. If None is passed, only the topological relations influence the resulting communities.
Returns
-------
regions: dict
A dictionary where to each primal edgeID (key) corresponds a region code (value).
"""
edges_gdf = edges_gdf.copy()
subdvisions = []
if weight is None:
weight = 'topo' #the function requires a string
# extraction of the best partitions
partition = community.best_partition(dual_graph, weight=weight)
dct = dual_id_dict(partition, dual_graph, 'edgeID')
subdvisions.append(dct)
# saving the data in a GeoDataFrame
partitions_df = dict_to_df(subdvisions, ['p_'+weight])
regions = pd.merge(edges_gdf, partitions_df, left_on = 'edgeID', right_index = True, how= 'left')
return regions
[docs]def identify_regions_primal(graph, nodes_gdf, weight = None):
"""
It identifies regions in the street network, using the primal graph representation.
The modularity optimisation technique is used to identify urban regions.
Parameters
----------
graph: Networkx.Graph
The primal graph of an urban area.
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
Returns
-------
regions: dict
A dictionary where to each nodeID (key) corresponds a region code (value).
"""
subdvisions = []
if weight is None:
weight = 'topo' #the function requires a string
# extraction of the best partitions
partition = community.best_partition(graph, weight=weight)
regions = nodes_gdf.copy()
regions['p_'+weight] = regions.nodeID.map(partition)
return regions
[docs]def polygonise_partitions(edges_gdf, column, convex_hull = True, buffer = 30):
"""
Given districts assign to street segments it create polygons representing districts, either by creating a convex_hull for each group of segments or
simply polygonising them.
Parameters
----------
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: string
The name of the column containing the district identifier.
convex_hull: boolean
When True creates create convex hulls after having polygonised the cluster of segments.
buffer: float
Desired buffer around the polygonised segments, before possibly obtaining the convex hulls.
Returns
-------
polygonised_partitions: Polygon GeoDataFrame
A GeoDataFrame containing the polygonised partitions.
"""
polygons = []
partitionIDs = []
d = {'geometry' : polygons, column : partitionIDs}
partitions = edges_gdf[column].unique()
for i in partitions:
polygon = polygonize_full(edges_gdf[edges_gdf[column] == i].geometry.unary_union)
polygon = unary_union(polygon).buffer(buffer)
if convex_hull:
polygons.append(polygon.convex_hull)
else:
polygons.append(polygon)
partitionIDs.append(i)
df = pd.DataFrame(d)
polygonised_partitions = gpd.GeoDataFrame(df, crs=edges_gdf.crs, geometry=df['geometry'])
return polygonised_partitions
[docs]def district_to_nodes_from_edges(nodes_gdf, edges_gdf, column):
"""
It assigns districts' identifiers to the street junctions (nodes), when the districts are assigned to the street segments (edges),
i.e. communities are identified on the dual graph. The attribution is based on Euclidean distance from each node to the closest street segment.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: string
The name of the column containing the district identifier.
Returns
-------
nodes_gdf: Point GeoDataFrame
The updated street junctions GeoDataFrame.
"""
nodes_gdf = nodes_gdf.copy()
nodes_gdf[column] = 0
sindex = edges_gdf.sindex # spatial index
nodes_gdf[column] = nodes_gdf.apply(lambda row: _assign_district_to_node(row['geometry'], edges_gdf, sindex, column), axis = 1)
nodes_gdf[column] = nodes_gdf[column].astype(int)
return nodes_gdf
def _assign_district_to_node(node_geometry, edges_gdf, sindex, column):
"""
Supporting function for district_to_nodes_from_edges
Parameters
----------
node_geometry: Point
A node's geometry.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
sindex: Spatial Index
Spatial Index object of the edges_gdf.
column: string
The name of the column containing the district identifier.
Returns
-------
district: int
The district identifier.
"""
point = node_geometry
n = point.buffer(20)
possible_matches_index = list(sindex.intersection(n.bounds))
pm = edges_gdf.iloc[possible_matches_index].copy()
dist = min_distance_geometry_gdf(point, pm)
district = edges_gdf.loc[dist[1]][column]
return district
[docs]def districts_to_edges_from_nodes(nodes_gdf, edges_gdf, column):
"""
It assigns districts' identifiers to the street segments (edges), when the districts are assigned to the junctions(nodes), i.e. communities are identified on the
primal graph. The attribution is based on Euclidean distance from each node to the closest street segment.
Three values are assigned to each edge:
- district_u: An integer representing the district identifier for the starting node of the edge.
- district_v: An integer representing the district identifier for the ending node of the edge.
- district_uv: An integer representing the district identifier for the edge, when district_u == district_v.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: string
The name of the column containing the district identifier.
Returns
-------
edges_gdf: LineString GeoDataFrame
The updated street segments GeoDataFrame.
"""
ix_u = edges_gdf.columns.get_loc('u')+1
ix_v = edges_gdf.columns.get_loc('v')+1
edges_gdf = edges_gdf.copy()
edges_gdf[column+'_uv'] = 999999
edges_gdf[column+'_u'] = 999999
edges_gdf[column+'_v'] = 999999
edges_gdf[[column+'_uv', column+'_u', column+'_v']] = edges_gdf.apply(lambda row: _assign_district_to_edge(row['edgeID'], nodes_gdf, edges_gdf,
column), axis = 1, result_type= 'expand')
return edges_gdf
def _assign_district_to_edge(edgeID, nodes_gdf, edges_gdf, column):
"""
Supporting function for districts_to_edges_from_nodes
Parameters
----------
edgeID: int
The edgeID.
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame .
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: string
The name of the column containing the district identifier.
Returns
-------
tuple
"""
series = edges_gdf.loc[edgeID]
district_uv = 999999
district_u = nodes_gdf.loc[series.u][column]
district_v = nodes_gdf.loc[series.v][column]
if district_u == district_v:
district_uv = district_u
return district_uv, district_u, district_v
[docs]def district_to_nodes_from_polygons(nodes_gdf, partitions_gdf, column):
"""
It assigns districts' identifiers to the street junctions (nodes), from polygons representing district areas.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
partitions_gdf: Polygon GeoDataFrame
The nodes (junctions) GeoDataFrame.
column: string
The name of the column containing the district identifier.
Returns
-------
nodes_gdf: Point GeoDataFrame
The updated street junctions GeoDataFrame.
"""
nodes_gdf = nodes_gdf.copy()
nodes_gdf[column] = nodes_gdf.apply(lambda row: _assign_district_to_node_from_polygons(row['geometry'], partitions_gdf, column), axis = 1)
nodes_gdf[column] = nodes_gdf[column].astype(int)
return nodes_gdf
def _assign_district_to_node_from_polygons(node_geometry, partitions_gdf, column):
"""
Supporting function for district_to_nodes_from_polygons
Parameters
----------
node_geometry: Point
A node's geometry.
partitions_gdf: Polygon GeoDataFrame
The nodes (junctions) GeoDataFrame.
column: string
The name of the column containing the district identifier.
Returns
-------
district: int
The district identifier.
"""
point = node_geometry
dist = min_distance_geometry_gdf(point, partitions_gdf)
district = partitions_gdf.loc[dist[1]][column]
return district
[docs]def amend_nodes_membership(nodes_gdf, edges_gdf, column, min_size_district = 10):
"""
Amend the membership of nodes to districts based on connectivity and minimum district size.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: str
The name of the column containing the district identifier.
min_size_district: int
The minimum size (number of nodes) required for a district to be considered valid. Default is 10.
Returns
-------
nodes_gdf: GeoDataFrame
The updated nodes GeoDataFrame with amended district memberships.
"""
nodes_gdf = nodes_gdf.copy()
nodes_gdf = _check_disconnected_districts(nodes_gdf, edges_gdf, column, min_size_district)
# if there are invalid districts, amend
while (999999 in nodes_gdf[column].unique()):
nodes_gdf[column] = nodes_gdf.apply(lambda row: _amend_node_membership(row['nodeID'], nodes_gdf, edges_gdf, column), axis = 1)
nodes_gdf = _check_disconnected_districts(nodes_gdf, edges_gdf, column, min_size_district)
return nodes_gdf
def _amend_node_membership(nodeID, nodes_gdf, edges_gdf, column):
"""
Amend the membership of a specific node to a district based on connectivity and neighboring nodes' districts.
Parameters
----------
nodeID: int
The ID of the node to amend the membership for.
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: str
The name of the column containing the district membership.
Returns
-------
new_district: int
The amended district membership for the specified node.
"""
# check if the current district membership of the node is not 999999, in which case return the existing membership without any changes
if nodes_gdf.loc[nodeID][column] != 999999:
return nodes_gdf.loc[nodeID][column]
# if the current membership is 999999 (no district), select the edges connected to the node and create a list of unique neighboring nodes
tmp_edges = edges_gdf[(edges_gdf.u == nodeID) | (edges_gdf.v == nodeID)].copy()
unique = list(np.unique(tmp_edges[['u', 'v']].values))
unique.remove(nodeID)
# select the subset of nodes from the nodes_gdf that belong to the neighboring nodes and have a non-999999 district membership
tmp_nodes = nodes_gdf[(nodes_gdf.nodeID.isin(unique)) & (nodes_gdf[column] != 999999) ].copy()
# if no such nodes are found, indicating a lack of connected nodes with valid district memberships, assign the node to the invalid district 999999 and return it
if len(tmp_nodes) == 0:
return 999999
# If there are connected nodes with valid district memberships, calculate the counts of each district and sort them in descending order
districts_sorted = tmp_nodes[column].value_counts(sort=True, ascending=False)
if len(districts_sorted) == 1:
return districts_sorted.idxmax()
# If there is only one district with the highest count, return the district with the highest count as the amended membership for the node
if districts_sorted.iloc[0] > districts_sorted.iloc[1]:
return districts_sorted.idxmax()
# if there's more than a winnter select a subset of tmp_nodes based on their district membership.
# this filters the nodes to consider only to those belonging to the two districts with the highest counts.
# keep the first two and the corresponding nodes and find the final best district on the basis of Euclidean distance
tmp_nodes = tmp_nodes[tmp_nodes[column].isin(list(districts_sorted[0:2].index))]
closest_ix = min_distance_geometry_gdf(nodes_gdf.loc[nodeID].geometry, tmp_nodes)[1]
new_district = tmp_nodes.loc[closest_ix][column]
return new_district
def _check_disconnected_districts(nodes_gdf, edges_gdf, column, min_size = 10):
"""
Check for disconnected districts in the nodes GeoDataFrame and update their membership to '999999' if necessary.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: str
The name of the column containing the district identifier.
min_size: int
The minimum size of a district for it to be considered valid. Defaults to 10.
Returns
-------
nodes_gdf: Point GeoDataFrame
The updated nodes GeoDataFrame with potentially disconnected districts updated to '999999'.
"""
nodes_gdf = nodes_gdf.copy()
districts = nodes_gdf[column].unique()
for district in districts:
if district == 999999:
continue
tmp_nodes = nodes_gdf[nodes_gdf[column] == district].copy()
tmp_edges = edges_gdf[edges_gdf.u.isin(tmp_nodes.nodeID) & edges_gdf.v.isin(tmp_nodes.nodeID)].copy()
# if the district is too small, make it not valid
if len(tmp_nodes) < min_size:
nodes_gdf.loc[nodes_gdf.nodeID.isin(tmp_nodes.nodeID), column] = 999999
continue
# create a graph with only nodes and belonging to the district
tmp_graph = graph_fromGDF(tmp_nodes, tmp_edges, 'nodeID')
# if the graph composed of these elements is not connected
if not nx.is_connected(tmp_graph):
largest_component = max(nx.connected_components(tmp_graph), key=len)
G = tmp_graph.subgraph(largest_component)
# make not valid all the nodes not connected within the graph
to_check = [item for item in list(tmp_nodes.nodeID) if item not in list(G.nodes())]
nodes_gdf.loc[nodes_gdf.nodeID.isin(to_check), column] = 999999
return nodes_gdf
[docs]def find_gateways(nodes_gdf, edges_gdf, column):
"""
This function identifies junctions lying on the boundary of a district, thus connected to other districts through "bridge" edges.
Parameters
----------
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: string
The name of the column containing the district membership.
Returns
-------
nodes_gdf: Point GeoDataFrame
The updated nodes GeoDataFrame with potentially disconnected districts updated to '999999'.
"""
# assign gateways
nodes_gdf = nodes_gdf.copy()
nodes_gdf['gateway'] = nodes_gdf.apply(lambda row: _gateway(row['nodeID'], nodes_gdf, edges_gdf, column), axis = 1)
return nodes_gdf
def _gateway(nodeID, nodes_gdf, edges_gdf, column):
"""
It supports the find_gateways function.
Parameters
----------
nodeID: int
nodeID of the node.
nodes_gdf: Point GeoDataFrame
The nodes (junctions) GeoDataFrame.
edges_gdf: LineString GeoDataFrame
The street segments GeoDataFrame.
column: str
The name of the column containing the district membership.
Returns
-------
int
1 = gateway, 0 = not a gateway.
"""
# edges connected to the given node
tmp = edges_gdf[(edges_gdf.u == nodeID) | (edges_gdf.v == nodeID)].copy()
# nodes linked to the given node
tmp_nodes = nodes_gdf[nodes_gdf.nodeID.isin(tmp.u) | nodes_gdf.nodeID.isin(tmp.v)].copy()
# if some of the other nodes belong to a different district, then this is a gateway
if (len(tmp_nodes[column].unique()) > 1):
return 1
return 0