芜湖不芜
如果这是DataFrame您正在使用的 pandas,为什么不过滤值并立即对它们求和呢?这样的事情可能会起作用:trip_counts['AM'] = len(trip[trip.loc[:, 'hour'] < 12].index)trip_counts['PM'] = len(trip[trip.loc[:, 'hour'] >= 12].index)编辑:我只是对这里给出的答案进行了一些基准测试,因为有些人认为列表理解会自动更快。正如您所看到的,在这种情况下,常规的 for 循环或多或少具有最佳性能,仅Counter与此处其他答案之一中提到的列表推导的使用相匹配。请注意,我稍微修改了我的 Pandas 实现以匹配我认为您的数据可能的结构(即,不在 DataFrame 中),因此在每次运行时将您的数据转换为 DataFrame 可能会有更多的开销。基准生成此图的代码如下所示:import pandas as pdimport numpy as npfrom collections import Counterfrom types import SimpleNamespaceimport perfplotdef gen_data(n): onebike_datetimes = [ {'start': SimpleNamespace(hour=9)}, {'start': SimpleNamespace(hour=3)}, {'start': SimpleNamespace(hour=14)}, {'start': SimpleNamespace(hour=19)}, {'start': SimpleNamespace(hour=7)}, {'start': SimpleNamespace(hour=14)}, {'start': SimpleNamespace(hour=19)}, {'start': SimpleNamespace(hour=2)}, {'start': SimpleNamespace(hour=20)}, {'start': SimpleNamespace(hour=12)}, ]*n return onebike_datetimesdef use_vanilla_for(a):# onebike_datetimes = gen_data(n) onebike_datetimes = a trip_counts = {'AM': 0, 'PM': 0} for trip in onebike_datetimes: if trip['start'].hour < 12: trip_counts["AM"] += 1 else: trip_counts["PM"] += 1 return 1 # return trip_countsdef use_list_comp(a):# onebike_datetimes = gen_data(n) onebike_datetimes = a trip_counts = {'AM': 0, 'PM': 0} l = ["AM" if trip["start"].hour < 12 else "PM" for trip in onebike_datetimes] trip_counts = {i: l.count(i) for i in l} return 1# return trip_countsdef use_counter(a):# onebike_datetimes = gen_data(n) onebike_datetimes = a trip_counts = {'AM': 0, 'PM': 0} trip_counts = Counter(("AM" if trip['start'].hour < 12 else "PM") for trip in onebike_datetimes) return 1# return trip_countsdef use_pandas(a):# onebike_datetimes = gen_data(n) onebike_datetimes = a trip = pd.DataFrame(list(map(lambda a: a['start'].hour, onebike_datetimes)), columns=['hrs']) trip_counts = {'AM': 0, 'PM': 0} trip_counts['AM'] = len(trip[trip['hrs'] < 12].index) trip_counts['PM'] = len(trip[trip['hrs'] >= 12].index) return 1# return trip_countsperfplot.show( setup=lambda n: gen_data(n), kernels=[ lambda a: use_vanilla_for(a), lambda a: use_list_comp(a), lambda a: use_counter(a), lambda a: use_pandas(a), ], labels=["vanilla_for", "list_comp", "counter", "dataframe"], n_range=[2 ** k for k in range(10)], xlabel="len(a)",)