猿问

字符串后的正则表达式多行匹配

我正在尝试从文本文件中提取&中的PROCEDURE部分。CLAIMEOBCOB


并像这样创建一个对象


claim : [{PROCEDURE1}, {PROCEDURE2}, {PROCEDURE3}],

eob : [{PROCEDURE1}, {PROCEDURE2}, {PROCEDURE3}],

cob: [{PROCEDURE1}, {PROCEDURE2}, {PROCEDURE3}]

let data = `    SEND CLAIM {

       PREFIX="9403        "

       PROCEDURE { /* #1  */

          PROCEDURE_LINE="1"

          PROCEDURE_CODE="01201"

        

       }

       PROCEDURE { /* #2  */

          PROCEDURE_LINE="2"

          PROCEDURE_CODE="02102"

         

       }

       PROCEDURE { /* #3  */

          PROCEDURE_LINE="3"

          PROCEDURE_CODE="21222"

       

       }

    }

    

    SEND EOB {

          PREFIX="9403        "

          OFFICE_SEQUENCE="000721"

          PROCEDURE { /* #1 */

             PROCEDURE_LINE="1"

             ELIGIBLE="002750"

          }

          PROCEDURE { /* #2 */

             PROCEDURE_LINE="2"

             ELIGIBLE="008725"

          }

          PROCEDURE { /* #3 */

             PROCEDURE_LINE="3"

             ELIGIBLE="010200"

          }

    }

    

    SEND COB {

       PREFIX="TEST4       "

       OFFICE_SEQUENCE="000721"

       PROCEDURE { /* #1  */

          PROCEDURE_LINE="1"

          PROCEDURE_CODE="01201"

        

       }

       PROCEDURE { /* #2  */

          PROCEDURE_LINE="2"

          PROCEDURE_CODE="02102"

       }

       PROCEDURE { /* #3  */

          PROCEDURE_LINE="3"

          PROCEDURE_CODE="21222"

          DATE="19990104"

       }

       PRIME_EOB=SEND EOB {

          PREFIX="9403        "

          OFFICE_SEQUENCE="000721"

          PROCEDURE { /* #1 */

             PROCEDURE_LINE="1"

             ELIGIBLE="002750"

          }

          PROCEDURE { /* #2 */

             PROCEDURE_LINE="2"

             ELIGIBLE="008725"

          }

          PROCEDURE { /* #3 */

             PROCEDURE_LINE="3"

             ELIGIBLE="010200"

          }

    

       }

    }`

    

    let re = /(^\s+PROCEDURE\s\{)([\S\s]*?)(?:})/gm

    

    console.log(data.match(re));


到目前为止,这是我尝试过的(^\s+PROCEDURE\s\{)([\S\s]*?)(?:}),但我不知道如何PROCEDURE在 keyCLAIM或EOB


绝地无双
浏览 157回答 3
3回答

慕的地6264312

对于“声明”,您可以匹配以下正则表达式。/(?<=^ *SEND CLAIM +\{\r?\n(?:^(?! *SEND EOB *\{)(?! *SEND COB *\{).*\r?\n)*^ *PROCEDURE *)\{[^\}]*\}/声明正则表达式这与以下字符串匹配,我认为可以轻松地将其保存到带有少量 Javascript 代码的数组中。&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;{ /* CLAIM #1&nbsp; */&nbsp;&nbsp;&nbsp; &nbsp;PROCEDURE_LINE="1"&nbsp; &nbsp;PROCEDURE_CODE="01201"&nbsp; &nbsp;&nbsp;}&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; { /* CLAIM #2&nbsp; */&nbsp; &nbsp;PROCEDURE_LINE="2"&nbsp; &nbsp;PROCEDURE_CODE="02102"&nbsp;&nbsp;}&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; { /* CLAIM #3&nbsp; */&nbsp; &nbsp;PROCEDURE_LINE="3"&nbsp; &nbsp;PROCEDURE_CODE="21222"&nbsp; &nbsp;}Javascript 的正则表达式引擎执行以下操作。(?<=&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;: begin positive lookbehind&nbsp; ^&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : match beginning of line&nbsp; \ *SEND CLAIM\ +&nbsp; &nbsp;: match 'SEND CLAIM' surrounded by 0+ spaces&nbsp; \{\r?\n&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : match '{' then line terminators&nbsp; (?:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : begin non-capture group&nbsp; &nbsp; ^&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : match beginning of line&nbsp; &nbsp; (?!&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : begin negative lookahead&nbsp; &nbsp; &nbsp; \ *SEND EOB\ * : match 'SEND EOB' surrounded by 0+ spaces&nbsp; &nbsp; &nbsp; \{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;: match '{'&nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : end negative lookahead&nbsp; &nbsp; (?!&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : begin negative lookahead&nbsp; &nbsp; &nbsp; \ *SEND COB\ * : match 'SEND COB' surrounded by 0+ spaces&nbsp; &nbsp; &nbsp; \{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;: match '{'&nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : end negative lookahead&nbsp; &nbsp; .*\r?\n&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : match line including terminators&nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : end non-capture group&nbsp; *&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : execute non-capture group 0+ times&nbsp; ^&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : match beginning of line&nbsp; \ *PROCEDURE\ *&nbsp; &nbsp; : match 'PROCEDURE' surrounded by 0+ spaces&nbsp;)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; : end positive lookbehind\{[^\}]*\}&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;: match '{', 0+ characters other than '}', '}'&nbsp;我已经在上面转义了空格字符以提高可读性。对于“eob”,使用稍微修改的正则表达式:/(?<=^ *SEND EOB +\{\r?\n(?:^(?! *SEND CLAIM *\{)(?! *SEND COB *\{).*\r?\n)*^ *PROCEDURE *)\{[^\}]*\}/EOB 正则表达式我没有尝试对“cob”做同样的事情,因为该部分的结构与“claim”和“eob”不同,我不清楚如何处理它。最后一点,如果不是很明显:使用带有循环的约定代码和可能的简单正则表达式来提取感兴趣的字符串要容易得多,但我希望一些读者可能会发现我的回答对正则表达式的某些元素具有指导意义.

回首忆惘然

将并始终保持相同的顺序CLAIM?如果是这样,您可以在使用已有的正则表达式之前拆分文本:EOBCOBconst procRegex = /(^\s+PROCEDURE\s\{)([\S\s]*?)(?:})/gm;let claimData = data.split("EOB")[0];let claimProcedures = claimData.match(procRegex);let eobData = data.split("COB")[0].split("EOB")[1];let eobProcedures = eobData.match(procRegex);let cobData = data.split("COB")[1];let cobProcedures = cobData.match(procRegex);// If you want to leave out the PRIME_EOB, you can split COB againcobData = cobData.split("EOB")[0];cobProcedures = cobData.match(procRegex);console.log(claimProcedures);输出:[&nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;PROCEDURE { /* #1&nbsp; */\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_LINE="1"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_CODE="01201"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; \n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;}',&nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;PROCEDURE { /* #2&nbsp; */\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_LINE="2"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_CODE="02102"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;}',&nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;PROCEDURE { /* #3&nbsp; */\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_LINE="3"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; PROCEDURE_CODE="21222"\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;\n' +&nbsp; &nbsp; '&nbsp; &nbsp; &nbsp; &nbsp;}']演示作为一种替代方法,您的数据与有效的 JSON 相距不远,因此您可以使用它来运行。下面的代码将数据转换为 JSON,然后将其解析为 Javascript 对象,您可以随意使用该对象。/* data cannot have Javascript comments in it for this to work, or you need&nbsp; &nbsp;another regex to remove them */data = data.replace(/=/g, ":") // replace = with :&nbsp; .replace(/\s?{/g, ": {") // replace { with : {&nbsp; .replace(/SEND/g, "") // remove "SEND"&nbsp; .replace(/\"\s*$(?!\s*\})/gm, "\",") // add commas after object properties&nbsp; .replace(/}(?=\s*\w)/g, "},") // add commas after objects&nbsp; .replace(/(?<!\}),\s*PROCEDURE: /g, ",\nPROCEDURES: [") // start procedures list&nbsp; .replace(/(PROCEDURE:[\S\s]*?\})\s*(?!,\s*PROCEDURE)/g, "$1]\n") // end list&nbsp; .replace(/PROCEDURE: /g, "") // remove "PROCEDURE"&nbsp; .replace("PRIME_EOB: EOB:", "PRIME_EOB:") // replace double key with single key. Is this the behavior you want?&nbsp; .replace(/(\S*):/g, "\"$1\":") // put quotes around object key nameslet dataObj = JSON.parse("{" + data + "}");console.log(dataObj.CLAIM.PROCEDURES);输出:[ { PROCEDURE_LINE: '1', PROCEDURE_CODE: '01201' },&nbsp; { PROCEDURE_LINE: '2', PROCEDURE_CODE: '02102' },&nbsp; { PROCEDURE_LINE: '3', PROCEDURE_CODE: '21222' } ]

Qyouu

您要做的是为文本文件中使用的语法编写一个解析器。如果看一下语法,它看起来很像 JSON。我建议使用正则表达式修改语法以获得有效的 JSON 语法并使用 JavaScript JSON 解析器对其进行解析。解析器能够处理递归。最后,您将拥有一个 JavaScript 对象,该对象允许您删除或添加您需要的任何内容。此外,源的层次结构将被保留。此代码为提供的示例完成了工作:let data = `&nbsp; &nbsp; SEND CLAIM {// your text file contents}`;// handle PRIME_EOB=SEND EOB {var regex = /(\w+)=\w+.*{/gm;var replace = data.replace(regex, "$1 {");// append double quotes in lines like PROCEDURE_LINE="1"var regex = /(\w+)=/g;var replace = replace.replace(regex, "\"$1\": ");// append double quotes in lines like PROCEDURE {var regex = /(\w+.*)\s{/g;var replace = replace.replace(regex, "\"$1\": {");// remove comments: /* */var regex = /\/\**.*\*\//g;var replace = replace.replace(regex, "");// append commas to lines i.e. "PROCEDURE_LINE": "2"var regex = /(\".*\":\s*\".*\")/gm;var replace = replace.replace(regex, "$1,");// append commas to '}'var regex = /^.*}.*$/gm;var replace = replace.replace(regex, "},");// remove trailing commasvar regex = /\,(?!\s*?[\{\[\"\'\w])/g;var replace = replace.replace(regex, "");// surround with {}replace = "{" + replace + "}";console.log(replace);var obj = JSON.parse(replace);console.log(obj);JSON 看起来像这个片段:{&nbsp; &nbsp; "SEND CLAIM": {&nbsp; &nbsp; &nbsp; &nbsp;"PREFIX": "9403&nbsp; &nbsp; &nbsp; &nbsp; ",&nbsp; &nbsp; &nbsp; &nbsp;"PROCEDURE": {&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "PROCEDURE_LINE": "1",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "PROCEDURE_CODE": "01201"&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;},&nbsp; &nbsp; &nbsp; &nbsp;"PROCEDURE": {&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "PROCEDURE_LINE": "2",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "PROCEDURE_CODE": "02102"最终对象像这样出现在调试器中我并不完全清楚你的最终数组或对象应该是什么样子。但从这里开始,我预计只需很少的努力就能产生你想要的东西。
随时随地看视频慕课网APP

相关分类

JavaScript
我要回答