qq_遁去的一_1
$ cd ~$ more aggregate.csvXXXXXX$ more ./Desktop/folder/sub-folder/sample.csvA,1A,2A,3A,4A,5$ more ./Desktop/folder/sub-folder/sub-sub-folder/sample.csvB,6B,7B,8B,9$ more ./Desktop/folder/sub-folder2/sample.csvC,10C,11C,12C,13C,14C,15C,16$ more ./Desktop/folder/sub-folder3/sub-sub-folder/sample.csvD,17D,18D,19$ python3 aggregate_samples.py ./Desktop./Desktop/folder/sub-folder/sample.csv./Desktop/folder/sub-folder/sub-sub-folder/sample.csv./Desktop/folder/sub-folder2/sample.csv./Desktop/folder/sub-folder3/sub-sub-folder/sample.csv$ cat aggregate.csvX,1,6,10,17X,2,7,11,18X,3,8,12,19X,4,9,13,X,5,,14,X,,,15,,,,16,这是完成此操作的代码。您需要的关键技术:os.walk()递归搜索文件夹、csv读取文件的模块sample.csv(并获取第二列)、累积样本的列表以及csv再次写出结果。我假设您的sample.csv文件将具有不同的长度,因此代码会处理它(通过预先分配一个稀疏矩阵)。这假设您的数据集足够小以适合内存。如果没有,那么需要做更多的工作。# aggregate_samples.pyimport osimport sysimport argparseimport csvdef main(options): columns = [] try: # Load in aggregate.csv, if there is one. with open('aggregate.csv') as f: column = [line.rstrip('\n') for line in f] columns.append(column) except FileNotFoundError: # Doesn't exist; create it later. pass longest_sample = 0 for d, subdirs, files in os.walk(options.directory): subdirs.sort() for filename in files: if filename == 'sample.csv': file_path = os.path.join(d, filename) print(file_path) samples = [] with open(file_path) as f: reader = csv.reader(f, delimiter=',') # Get the 2nd column. for sample in reader: samples.append(sample[1]) longest_sample = max(longest_sample, len(samples)) columns.append(samples) # Pre-fill a transpose matrix according to number of columns # and longest colum. a = [ [ '' for i in columns ] for j in range(longest_sample) ] # Move samples into matrix, transposing as you go. for i in range(len(columns)): for j in range(len(columns[i])): a[j][i] = columns[i][j] # Output matrix as CSV. with open('aggregate.csv', 'w+') as aggregate: writer = csv.writer(aggregate, delimiter=',') writer.writerows(a) return 0if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Directory path.') options = parser.parse_args() sys.exit(main(options))