狐的传说
更新此选项计算一次并返回一个可在迭代行时使用的logic函数。compare当数据有很多行时,速度会更快。# written as a function because you don't share the definition of load_data# but the main idea can be translated to a classdef calc_avg(self, specific, filter, logic, threshold): if isinstance(threshold, str): threshold = float(threshold) def lt(a, b): return a < b def gt(a, b): return a > b def lte(a, b): return a <= b def gte(a, b): return a >= b if logic == 'lt': compare = lt elif logic == 'gt': compare = gt elif logic == 'lte': compare = lte elif logic == 'gte': compare = gte with io.StringIO(self) as avg_file: # change to open an actual file running_sum = running_count = 0 for row in csv.DictReader(avg_file, delimiter=','): if compare(int(row[filter]), threshold): running_sum += int(row[specific]) # or float(row[specific]) running_count += 1 if running_count == 0: # no even one row passed the filter return 0 else: return running_sum / running_countprint(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '15'))print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '2'))print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '0'))输出9.2511.00初步答复为了过滤行,一旦确定应该使用哪种类型的不等式,就必须进行比较。这里的代码将其存储在 boolean 中include。然后你可以有两个变量:running_sum和running_count稍后应该除以返回平均值。import ioimport csv# written as a function because you don't share the definition of load_data# but the main idea can be translated to a classdef calc_avg(self, specific, filter, logic, threshold): if isinstance(threshold, str): threshold = float(threshold) with io.StringIO(self) as avg_file: # change to open an actual file running_sum = running_count = 0 for row in csv.DictReader(avg_file, delimiter=','): # your code has: filter = int(row[filter]) value = int(row[filter]) # avoid overwriting parameters if logic == 'lt' and value < threshold: include = True elif logic == 'gt' and value > threshold: include = True elif logic == 'lte' and value <= threshold: # should it be 'le' include = True elif logic == 'gte' and value >= threshold: # should it be 'ge' include = True # or import ast and consider all cases in one line # if ast.literal_eval(f'{value}{logic}{treshold}'): # include = True else: include = False if include: running_sum += int(row[specific]) # or float(row[specific]) running_count += 1 return running_sum / running_count data = """RecordID,SAPS-I,SOFA,Length_of_stay132539,6,1,5132540,16,8,8132541,21,11,19132545,17,2,4132547,14,11,6132548,14,4,9132551,19,8,6132554,11,0,17"""print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '15'))print(calc_avg(data, 'Length_of_stay', 'SOFA', 'lt', '2'))输出9.2511.0