慕的地8271018
我复制了您提供的数据,然后计算了距离。对于每个发行人,我找到最近的发行人及其距离。请参阅下面的修改代码。如果您需要更多详细信息,请告诉我。issuers = ["Ryerson Tull Inc", "Siebel Sys Inc", "Travis Boats & Motors Inc", "Channell Commercial Corp", "Printware Inc", "AAA", "BBB", "ZZZ"]ttl_assets = [9322000.0, 995010.0, 313500.0, 426580.0, 145750.0, 299999.0, 399999.0, 123456.0]long_term_debt = [2632000.0, 0.0, 43340.0, 3380.0, 0.0, 11111.0, 22222.0, 87500.0]sic_code = [3661, 2456, 3661, 7483, 8473, 3661, 7483, 3661]ipo_year = [1996, 1996, 1996, 1996, 1996, 1996, 1996, 1997]data = pd.DataFrame({"issuer": issuers, "total assets": ttl_assets, "long term debt": long_term_debt, "SIC-Code": sic_code, "IPO Year": ipo_year })def get_distance(x1, x2): """ computes euclidean distance between two points """ d = math.sqrt((x1[0] - x2[0])**2 + (x1[1] - x2[1])**2) return round(d, 3)distMatrix = np.ndarray(shape=(len(data), len(data))) # creating an array to fill up the distancesdistMatrix[:, :] = np.inffor i in range(len(data)): for j in range(len(data)): if data.loc[i, "SIC-Code"] == data.loc[j, "SIC-Code"] and data.loc[i, "IPO Year"] == data.loc[j, "IPO Year"] and i != j: issuer1 = data.loc[i, ["total assets", "long term debt"]].values issuer2 = data.loc[j, ["total assets", "long term debt"]].values distance = get_distance(issuer1, issuer2) distMatrix[i, j] = distancelistIssuers = data["issuer"].tolist()arrMinDist = distMatrix.argmin(axis=0)dictMinDistIssuer = {} # dictionary that maps each issuer to its closest issuerdictMinDist = {} # maps each each issuer to the closest issuers distancedfDist = pd.DataFrame(distMatrix.tolist())dfDist.columns = listIssuersdfDist.insert(0, "issuer", listIssuers)dfDist.insert(1, "IPO Year", ipo_year)dfDist.insert(2, "SIC-Code", sic_code)for issuer_idx, min_idx in enumerate(arrMinDist): distance_value_counts = np.where(distMatrix==np.inf, 0, 1).sum(axis=0) # this checks if there are any matches for each issuer if distance_value_counts[issuer_idx] == 0: dictMinDistIssuer[listIssuers[issuer_idx]] = np.nan dictMinDist[listIssuers[issuer_idx]] = np.nan else: dictMinDistIssuer[listIssuers[issuer_idx]] = listIssuers[min_idx] dictMinDist[listIssuers[issuer_idx]] = distMatrix[issuer_idx][min_idx] dfDist["closest issuer"] = dfDist["issuer"].map(dictMinDistIssuer)dfDist["closest issuer dist"] = dfDist["issuer"].map(dictMinDist)dfDist.replace(to_replace=np.inf, value=np.nan, inplace=True)