Expand source code
def part2(projection,data_path='./ProcessFiles/', save_path='./ResultFiles/'):
#Run by input paths from the terminal
# data_path = input('Please set part2 input folder ')
# data_path = data_path + '/'
# save_path = input('Please set part2 output folder')
# save_path = save_path + '/'
#Your absolute paths
# data_path = './source/'
# save_path = './target/'
os.makedirs(save_path, exist_ok=True)
all_csv = get_all_csv(data_path)
print('Get all csv files')
for csv_num in range(len(all_csv)):
csv_name = all_csv[csv_num]
# var(save_path + '\\' + csv_name.replace('.csv', ''))
create_folder(os.path.join(save_path, csv_name.replace('.csv', '')))
df = pd.read_csv(os.path.join(data_path, csv_name))
x = df["LATITUDE"]
y = df["LONGITUDE"]
date = df["OBSERVATION DATE"]
length = len(x)
# Data preprocessing: Convert original latitude and longitude data
initial_data = get_initial_data(x, y, date, length,projection)
initial_data_df = pd.DataFrame(initial_data[1:], columns=initial_data[0])
initial_data_df.to_csv(os.path.join(save_path, csv_name.replace('.csv', ''), 'initial_data.csv'), index=False)
print('File {}/{},Step 1 finished'.format(csv_num + 1, len(all_csv)))
#Data preprocessing: Interpolation for missing data by interpolation()
initial_data = interpolation(date, length, initial_data)
print('{},Step2 finished'.format(csv_name))
#Data preprocessing: Smooth the data by rolling_window()
Rolling_window_data_df = rolling_window(initial_data, save_path, csv_name)
print('{},Step3 finished'.format(csv_name))
#Data preprocessing: Outlier detection by get_sldf()
SLDF_df = get_sldf(Rolling_window_data_df, save_path, csv_name)
print('{}, Get SLDF_df'.format(csv_name))
#Path estimation: Get daily population centroids by mean_shift()
result = mean_shift(SLDF_df,save_path,csv_name)
print('{},Get Meanshift results'.format(csv_name))
savecsvs(os.path.join(save_path, csv_name.replace('.csv', ''), "centroids.csv"), result)
# data_write(os.path.join(save_path, csv_name.replace('.csv', ''), "shift.xlsx"), result)
#Path estimation: Group the daily population centroids according to the minimum distance principle by group()
group(os.path.join(save_path, csv_name.replace('.csv', ''), "centroids.csv"), save_path, csv_name)
print('File{}/{},Group finished'.format(csv_num + 1, len(all_csv)))
#Path estimation:Show the estimation results on the map
map_1(save_path,csv_name,'gam')
#map_1(save_path,csv_name,'randomforest')
#map_1(save_path,csv_name,'knn')
#Index calculation of speed and offset distance by speed(),offset_distance()
speed(os.path.join(save_path, csv_name.replace('.csv', '')))
off_distance(os.path.join(save_path, csv_name.replace('.csv', '')))
print('File {}/{},speed and offset distance finished'.format(csv_num + 1, len(all_csv)))
#Index calculation of average distance of daily centroids by avg_distance()
avg_distance(os.path.join(save_path, csv_name.replace('.csv', '')))
print('File {}/{},Avg_distance finished'.format(csv_num + 1, len(all_csv)))
print('Finish,{}/{}'.format(csv_num + 1, len(all_csv)))