@inproceedings{451e5307c5f34074934c70aede09198e,
title = "Identification and removal of extraneous graphics in a commercial OCR operation",
abstract = "The major issue in OCRing of a document that is composed of a mixture of text and graphics (i.e. a mixed document) is the presence of graphics in the document. In this research efforts we propose two algorithms for identification and removal of two special types of graphics, namely, company logos and graphic displays with broken boundaries. A prototype is built and its performance evaluated on a test set of 198 scanned images of mixed documents. The prototype was able to remove 100% of the two types of graphics from the images.",
keywords = "Document analysis, Image enhancement, OCR, Pattern recognition, Text mining",
author = "Hashemi, {Ray R.} and Charlie Epperson and Steve Jones and Lei Jin and John Talburt",
year = "2002",
language = "English",
isbn = "1889335185",
series = "Multimedia, Image Processing and Soft Computing: Trends, Principles and Applications - Proceedings of the 5th Biannual World Automation Congress, WAC 2002, ISSCI 2002 and IFMIP 2002",
pages = "389--394",
booktitle = "Multimedia, Image Processing and Soft Computing",
note = "4th International Symposium on Soft Computing for Industry, ISSCI 2002 and the 3rd International Forum on Multimedia and Image Processing, IFMIP 2002, Held within the World Automation Congress, WAC 2002 ; Conference date: 09-06-2002 Through 13-06-2002",
}