Tuesday, November 11, 2008

strip text from pdf

Reference IKVM.GNU.Classpath
IKVM.Runtime
PDFBox-0.7.3

from pdfbox


Imports System.io
Imports org.pdfbox.pdmodel
Imports org.pdfbox.util

Public Class Form1

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
'Dim myStream As Stream


OpenFileDialog1.InitialDirectory = "c:\"
OpenFileDialog1.Filter = "txt files (*.txt)|*.txt|All files (*.*)|*.*|pdf files (*.pdf)|*.pdf"
OpenFileDialog1.FilterIndex = 2
OpenFileDialog1.RestoreDirectory = True

If OpenFileDialog1.ShowDialog() = DialogResult.OK Then
Me.TextBox1.Text = OpenFileDialog1.FileName
'myStream = OpenFileDialog1.OpenFile()
'If Not (myStream Is Nothing) Then
' ' Insert code to read the stream here.
' myStream.Close()
'End If
End If
Dim doc As PDDocument = PDDocument.load(OpenFileDialog1.FileName)
Dim stripper As New org.pdfbox.util.PDFTextStripper()

Me.TextBox1.Text = stripper.getText(doc)
' PDDocument doc = PDDocument.load(filename);
'PDFTextStripper stripper = new PDFTextStripper();
'return stripper.getText(doc);
End Sub
End Class

No comments: