Skip to content

GpsPreProcess

轨迹预处理类

提供了轨迹预处理的相关方法(行程划分、提取带途径点的OD)

Parameters:

Name Type Description Default
gps_df DataFrame

定位数据

None
use_multi_core bool

是否启用多核

False
used_core_num int

使用的核数

2
Source code in src/gotrackit/gps/GpsTrip.py
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(self, gps_df: pd.DataFrame = None, use_multi_core: bool = False, used_core_num: int = 2):
    """轨迹预处理类

    提供了轨迹预处理的相关方法(行程划分、提取带途径点的OD)

    Args:
        gps_df: 定位数据
        use_multi_core: 是否启用多核
        used_core_num: 使用的核数
    """
    self.gps_df = gps_df
    self.use_multi_core = use_multi_core
    self.used_core_num = used_core_num

行程划分

对轨迹数据进行行程划分

Parameters:

Name Type Description Default
time_format str

时间列格式模板

'%Y-%m-%d %H:%M:%S'
time_unit str

时间列单位

's'
plain_crs str

平面投影坐标系

'EPSG:32650'
group_gap_threshold float

时间阈值,主行程划分参数,单位秒,如果前后GPS点的定位时间超过该阈值,则在该点切分主行程

1800.0
n int

子行程切分参数,如果超过连续n个gps点的距离小于min_distance_threshold 且 持续时间超过dwell_accu_time,那么该处被识别为停留点,从该处切分子行程

5
min_distance_threshold float

子行程切分距离阈值,单位米,如果你只想划分主行程,则指定min_distance_threshold为负数即可

10.0
dwell_accu_time float

子行程切分时间阈值,秒

60.0

Returns:

Type Description
DataFrame

划分好行程的轨迹数据

Source code in src/gotrackit/gps/GpsTrip.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def trip_segmentations(self, time_format: str = '%Y-%m-%d %H:%M:%S',
                       time_unit: str = 's',
                       plain_crs: str = 'EPSG:32650', group_gap_threshold: float = 1800.0, n: int = 5,
                       min_distance_threshold: float = 10.0,
                       dwell_accu_time: float = 60.0) -> pd.DataFrame:
    """行程划分

    对轨迹数据进行行程划分

    Args:
        time_format: 时间列格式模板
        time_unit: 时间列单位
        plain_crs: 平面投影坐标系
        group_gap_threshold: 时间阈值,主行程划分参数,单位秒,如果前后GPS点的定位时间超过该阈值,则在该点切分主行程
        n: 子行程切分参数,如果超过连续n个gps点的距离小于min_distance_threshold 且 持续时间超过dwell_accu_time,那么该处被识别为停留点,从该处切分子行程
        min_distance_threshold: 子行程切分距离阈值,单位米,如果你只想划分主行程,则指定min_distance_threshold为负数即可
        dwell_accu_time: 子行程切分时间阈值,秒

    Returns:
        划分好行程的轨迹数据
    """
    if self.use_multi_core:
        trip_df = pd.DataFrame()
        result_list = []
        pool, core_num, agent_group = self.create_pool()
        for i in range(0, core_num):
            _gps_df = self.gps_df[self.gps_df[gps_field.AGENT_ID_FIELD].isin(agent_group[i])].copy()
            result = pool.apply_async(self._trip_segmentation,
                                      args=(_gps_df, time_format, time_unit, plain_crs, group_gap_threshold, n,
                                            min_distance_threshold, dwell_accu_time))
            result_list.append(result)
        pool.close()
        pool.join()
        max_agent_id = 0
        for res in result_list:
            _trip_df = res.get()
            _trip_df[agent_field] = _trip_df[agent_field] + max_agent_id
            trip_df = pd.concat([trip_df, _trip_df])
            max_agent_id = trip_df[agent_field].max()
        trip_df.reset_index(inplace=True, drop=True)
    else:
        trip_df = self._trip_segmentation(gps_df=self.gps_df, time_format=time_format,
                                          time_unit=time_unit, plain_crs=plain_crs,
                                          group_gap_threshold=group_gap_threshold, n=n,
                                          min_distance_threshold=min_distance_threshold,
                                          dwell_accu_time=dwell_accu_time)
    trip_df = pd.DataFrame(trip_df)
    try:
        del trip_df[geometry_field]
    except:
        pass
    return trip_df

提取带途径点的OD

从已经划分好行程的轨迹数据(且要求轨迹数据已经按照agent_id、time进行升序排列)中提取带途径点的OD

Parameters:

Name Type Description Default
way_points_num int

途径点数目

5

Returns:

Type Description
tuple[DataFrame, GeoDataFrame]

od数据表(DataFrame), od数据表(GeoDataFrame)

Source code in src/gotrackit/gps/GpsTrip.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def sampling_waypoints_od(self, way_points_num: int = 5) -> tuple[pd.DataFrame, gpd.GeoDataFrame]:
    """提取带途径点的OD

    从已经划分好行程的轨迹数据(且要求轨迹数据已经按照agent_id、time进行升序排列)中提取带途径点的OD

    Args:
        way_points_num: 途径点数目

    Returns:
        od数据表(DataFrame), od数据表(GeoDataFrame)
    """
    if self.use_multi_core:
        od_df, od_line = pd.DataFrame(), gpd.GeoDataFrame()
        result_list = []
        pool, core_num, agent_group = self.create_pool()
        for i in range(0, core_num):
            _gps_df = self.gps_df[self.gps_df[gps_field.AGENT_ID_FIELD].isin(agent_group[i])].copy()
            result = pool.apply_async(self._sampling_waypoints_od,
                                      args=(_gps_df, way_points_num))
            result_list.append(result)
        pool.close()
        pool.join()
        for res in result_list:
            _od_df, _od_line = res.get()
            od_df = pd.concat([od_df, _od_df])
            od_line = pd.concat([od_line, _od_line])
        od_df.reset_index(inplace=True, drop=True)
        od_line.reset_index(inplace=True, drop=True)
    else:
        od_df, od_line = self._sampling_waypoints_od(gps_df=self.gps_df, way_points_num=way_points_num)
    return od_df, od_line