如何使用iTextSharp进行文本格式化我正在使用iTextSharp从PDF中读取文本内容。我也能读到这一点。但我正在丢失文字格式,如字体,颜色等。有没有办法获得格式。以下是我用于确切文本的代码段 -PdfReader reader = new PdfReader("F:\\EBooks\\AspectsOfAjax.pdf");textBox1.Text = ExtractTextFromPDFBytes(reader.GetPageContent(1));private string ExtractTextFromPDFBytes(byte[] input){
if (input == null || input.Length == 0) return "";
try
{
string resultString = "";
// Flag showing if we are we currently inside a text object
bool inTextObject = false;
// Flag showing if the next character is literal e.g. '\\' to get a '\' character or '\(' to get '('
bool nextLiteral = false;
// () Bracket nesting level. Text appears inside ()
int bracketDepth = 0;
// Keep previous chars to get extract numbers etc.:
char[] previousCharacters = new char[_numberOfCharsToKeep];
for (int j = 0; j < _numberOfCharsToKeep; j++) previousCharacters[j] = ' ';
for (int i = 0; i < input.Length; i++)
{
char c = (char)input[i];
if (inTextObject)
{
// Position the text
if (bracketDepth == 0)
{
if (CheckToken(new string[] { "TD", "Td" }, previousCharacters))
{
resultString += "\n\r";
}
else
{
if (CheckToken(new string[] {"'", "T*", "\""}, previousCharacters))
{
resultString += "\n";
}
else
{
if (CheckToken(new string[] { "Tj" }, previousCharacters))
{
resultString += " ";
}
}
}
}
2 回答
- 2 回答
- 0 关注
- 680 浏览
添加回答
举报
0/150
提交
取消