如何使用iTextSharp进行文本格式化
我正在使用iTextSharp从PDF中读取文本内容。我也能读到这一点。但我正在丢失文字格式,如字体,颜色等。有没有办法获得格式。
以下是我用于确切文本的代码段 -
PdfReader reader = new PdfReader("F:\\EBooks\\AspectsOfAjax.pdf");textBox1.Text = ExtractTextFromPDFBytes(reader.GetPageContent(1));private string ExtractTextFromPDFBytes(byte[] input){ if (input == null || input.Length == 0) return ""; try { string resultString = ""; // Flag showing if we are we currently inside a text object bool inTextObject = false; // Flag showing if the next character is literal e.g. '\\' to get a '\' character or '\(' to get '(' bool nextLiteral = false; // () Bracket nesting level. Text appears inside () int bracketDepth = 0; // Keep previous chars to get extract numbers etc.: char[] previousCharacters = new char[_numberOfCharsToKeep]; for (int j = 0; j < _numberOfCharsToKeep; j++) previousCharacters[j] = ' '; for (int i = 0; i < input.Length; i++) { char c = (char)input[i]; if (inTextObject) { // Position the text if (bracketDepth == 0) { if (CheckToken(new string[] { "TD", "Td" }, previousCharacters)) { resultString += "\n\r"; } else { if (CheckToken(new string[] {"'", "T*", "\""}, previousCharacters)) { resultString += "\n"; } else { if (CheckToken(new string[] { "Tj" }, previousCharacters)) { resultString += " "; } } } }