
如何使用此 CFG 随机生成字符串?




[['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]

[['_Det', ['the', 'a', 'some', 'any', 'every']], ['_Adj', ['green', 'young', 'tired', 'confused']], ['_N', ['dog', 'cat']], ['_PropN', ['John', 'Mary']], ['_Vi', ['sleeps', 'walks']], ['_Vt', ['loves', 'hates']], ['_Vc', ['says', 'thinks', 'believes']], ['_Comp', ['that']]]


import random

psg_rules_str = "S → NP VP\n" \

                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \

                "VP → Vi | Vt NP | Vc Comp S"

terminals_str = "Det → the | a | some | any | every\n" \

                "Adj → green | young | tired | confused\n" \

                "N → dog | cat\n" \

                "PropN → John | Mary\n" \

                "Vi → sleeps | walks\n" \

                "Vt → loves | hates\n" \

                "Vc → says | thinks | believes\n" \

                "Comp → that"

psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]

for p in psg_rules_list:

    p[0] = "_" + p[0].strip()

    p[1] = p[1].split("|")

    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]


# [['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]

terminals_list = [a.split("→") for a in terminals_str.split("\n")]

for t in terminals_list:

    t[0] = "_" + t[0].strip()

    t[1] = t[1].split("|")

    t[1] = [a.strip() for a in t[1]]


浏览 68回答 1


你几乎让程序工作。以下是完成该函数的方法:reachTerminalsimport randompsg_rules_str = "S → NP VP\n" \                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \                "VP → Vi | Vt NP | Vc Comp S"terminals_str = "Det → the | a | some | any | every\n" \                "Adj → green | young | tired | confused\n" \                "N → dog | cat\n" \                "PropN → John | Mary\n" \                "Vi → sleeps | walks\n" \                "Vt → loves | hates\n" \                "Vc → says | thinks | believes\n" \                "Comp → that"psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]for p in psg_rules_list:    p[0] = "_" + p[0].strip()    p[1] = p[1].split("|")    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]terminals_list = [a.split("→") for a in terminals_str.split("\n")]for t in terminals_list:    t[0] = "_" + t[0].strip()    t[1] = t[1].split("|")    t[1] = [a.strip() for a in t[1]]def reachTerminals(from_nts, with_rules, with_ts):    from_nts = str.upper("_" + from_nts.replace("_", "").strip().replace(" ", " _"))    rule_tags = [a[0] for a in with_rules]    ts_tags = [a[0] for a in with_ts]    nts_todo = [a for a in rule_tags if a in from_nts]    while nts_todo:        for tag in nts_todo:            wr_index = rule_tags.index(tag)            repl_choices = with_rules[wr_index][1]            choice = random.choice(repl_choices)            from_nts = from_nts.replace(tag, choice, 1)        nts_todo = [a for a in rule_tags if a in from_nts]    ts_todo = [a for a in ts_tags if a in from_nts]    while ts_todo:        for tag in ts_todo:            wr_index = ts_tags.index(tag)            repl_choices = with_ts[wr_index][1]            choice = random.choice(repl_choices)            from_nts = from_nts.replace(tag, choice, 1)        ts_todo = [a for a in ts_tags if a in from_nts]    return from_ntsprint(reachTerminals(from_nts = "s", with_rules = psg_rules_list, with_ts = terminals_list))您可以使用的重要工具是 random.option 函数和 str.replace 函数的第三个参数,它只允许您替换子字符串的第一个匹配项。我还没有彻底测试代码,但它似乎按预期工作。输出示例:green John loves some confused dogMary says that the tired dog says that some green cat hates some catevery green dog loves young JohnJohn loves the tired cat

