在Perl中解析Excel文件的最佳方式是什么?
答
最好的办法是使用Spreadsheet::ParseExcel。
下面是一个例子:
#!/usr/bin/perl -w
use strict;
use warnings;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
for my $worksheet ($workbook->worksheets()) {
my ($row_min, $row_max) = $worksheet->row_range();
my ($col_min, $col_max) = $worksheet->col_range();
for my $row ($row_min .. $row_max) {
for my $col ($col_min .. $col_max) {
my $cell = $worksheet->get_cell($row, $col);
next unless $cell;
print "Row, Col = ($row, $col)\n";
print "Value = ", $cell->value(), "\n";
print "Unformatted = ", $cell->unformatted(), "\n";
print "\n";
}
}
}
要转换Excel文件使用Perl的文本,我建议excel2txt它使用电子表格:: ParseExcel的。
答
@echo off
ECHO CHECK THE VERSION FROM THE PROJECT
echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%
ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q
::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log
set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir% 1>>%0.log 2>>%0.error.log
ECHO.
set LogLevel=3
echo LogLevel is %LogLevel% 1>>%0.log 2>>%0.error.log
ECHO.
::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse% 1>>%0.log 2>>%0.error.log
echo.
set OutputDir=%BaseDir%
echo OutputDir is %Outputdir% 1>>%0.log 2>>%0.error.log
echo.
ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%
::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript% 1>>%0.log 2>>%0.error.log
ECHO.
echo Action !!! 1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
%0.error.log
%0.log
::debug pause
use strict;
use Spreadsheet::ParseExcel;
use utf8 ;
use Encode ;
package ExcelToHtml ;
my $DebugLevel = 3 ;
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ;
my ($ExcelFileToParse , $OutputDir , $BaseFileName) =();
sub main {
logMsg("$0 SCRIPT START " , 1);
#Action !!! call here methods
checkCommandLineArgs();
buildNames();
ParseExcelAndlogMsgFiles ($ExcelFileToParse , $BaseFileName , $OutputDir) ;
logMsg("$0 SCRIPT STOP " , 1);
} #eof MAIN
sub buildNames
{
$ExcelFileToParse = "$ARGV[1]" ;
$OutputDir="$ARGV[2]" ;
$BaseFileName = $ExcelFileToParse ;
logMsg("\$ExcelFileToParse is $ExcelFileToParse") ;
logMsg ("\$OutputDir is $OutputDir") ;
logMsg ("\$BaseFileName is $BaseFileName \n");
$BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/; #strip the directory part
logMsg ("\$BaseFileName is $BaseFileName ");
$BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ; #strip the file extension
logMsg ("\$BaseFileName is $BaseFileName ");
} #eof sub buildNames
sub ParseExcelAndlogMsgFiles {
my $ExcelFileToParse = shift ;
my $BaseFileName = shift ;
my $OutputDir = shift ;
my $strToReturn = "";
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");
for my $worksheet ($workbook->worksheets()) {
my ($row_min, $row_max) = $worksheet->row_range();
my ($col_min, $col_max) = $worksheet->col_range();
for my $row ($row_min .. $row_max) {
my $rowStr = "" ;
for my $col ($col_min .. $col_max) {
my $cell = $worksheet->get_cell($row, $col);
next unless $cell;
logMsg (" Row, Col = ($row, $col)") ;
logMsg ("\$cell->encoding() is " . $cell->encoding()) ;
logMsg (" Value = ", $cell->value());
$rowStr .= makeCell($cell->value()) ; #The Value
logMsg ("Unformatted = ", $cell->unformatted()) ;
} #eof col
$rowStr = makeRow($rowStr);
$strToReturn .= $rowStr ;
} #eof for my row
my $worksheetName = $worksheet->{'Name'} ;
my $FileSheet = "$OutputDir/$BaseFileName" . '.' . $worksheetName . '.' . 'html' ;
$strToReturn = makeTable ($strToReturn);
$strToReturn = makeFile ($strToReturn);
logMsg ("\$strToReturn is $strToReturn ") ;
PrintToFile ($FileSheet, $strToReturn) ;
$strToReturn = "" ;
$FileSheet = "" ;
} #eof for my worksheet
} #eof sub
sub trim
{
$_[0]=~s/^\s+//;
$_[0]=~s/\s+$//;
return $_[0];
}
# =========================================== eof sub trim
# Action !!!
sub makeRow
{
my $row = shift ;
return "<tr>" . $row . "</tr> \n" ;
}
sub makeCell
{
my $cell = shift ;
return "<td>" . "$cell" . " </td>" ;
}
sub makeTable
{
my $table = shift ;
return "\n <table> " . $table . "</table> \n" ;
}
sub makeFile
{
my $file = shift ;
$file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ;
return $file ;
}
sub checkCommandLineArgs
{
if (@ARGV != $minimumAmountOfCommandLineArgs)
{
logErrorMsg ("Not enougn command line args supplied exit 1" , 1);
die "usage: $0 1 args \n" ;
exit 1 ;
} #eof if
else
{
foreach (@ARGV) { logMsg ("$_ ") ; }
} #eof else if (@ARGV != 4)
} #eof sub checkCommandLineArgs
#log a message according to the DebugLevel
sub logMsg
{
my $msg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth();
my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" . '.log' ;
$msg = GetANiceTime() .' --- ' . $msg . " \n" ;
if ($importance == 1)
{
$msg = "\n============================================================ \n" . $msg ;
$msg = $msg . "============================================================ \n" ;
}
#debug nothing
if ($DebugLevel == 0) { return ; }
#just logMsg the message
if ($DebugLevel == 1)
{
logMsg ($msg);
} #eof if ($DebugLevel == 1)
#logMsg the message in
if ($DebugLevel == 2)
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
}
#logMsg the message in
if ($DebugLevel == 3)
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
print $msg ;
} #eof if ($DebugLevel == 3)
} #eof sub logMsg
#log a message according to the DebugLevel
sub logErrorMsg
{
my $errorMsg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth();
my $errorLogFile = "$CurrentPerlScriptName" . "_" . "$niceMonth" . '.error.log' ;
$errorMsg = GetANiceTime() .' --- ' . $errorMsg . " \n" ;
if ($importance == 1)
{
$errorMsg = "\n============================================================ \n" . $errorMsg ;
$errorMsg= $errorMsg. "============================================================ \n" ;
}
#debug nothing
if ($DebugLevel == 0) { return ; }
#just logMsg the message
if ($DebugLevel == 1) { print $errorMsg ; }
#logMsg the message in a error log file
if ($DebugLevel == 2)
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
}
#logMsg the message in
if ($DebugLevel == 3)
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
print $errorMsg ;
} #eof if ($DebugLevel == 3)
} #eof sub logErrorMsg
#GET A NICE TIME
sub GetANiceTime {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$min = "0$min" if ($min < 10);
$hour = "0$hour" if ($hour < 10);
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
$mday = "0$mday" if ($mday < 10);
return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec";
} #eof sub GetANiceTime
sub GetANiceMonth {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
return "$year\.$mon" ;
} #eof sub GetANiceTime
sub PrintToFile {
my $FileOutput = shift ;
my $StringToPrint = shift ;
#READ ALL ROWS OF A FILE TO ALIST
open (FILEOUTPUT, ">$FileOutput") ||
print "could not open the \$FileOutput $FileOutput!\n";
print FILEOUTPUT $StringToPrint ;
close FILEOUTPUT ;
#debug $strToReturn .= $StringToPrint;
}
# =========================================== eof sub PrintToFile
#Action !!!
main();
1 ;
__END__
#VersionHistory:
#1.0. --- Yordan Georgiev --- Initial creation
#1.1 --- Yordan Georgiev --- Added conditional logging
+0
这是批量文件和perl的大混乱。你可以清理一下,并解释一下每个部分是什么 – 2016-12-31 02:55:28
+1
这段代码太旧了,我真的不得不谷歌我的Gmail找到它: https://code.google.com/archive/p/exceltohtml/downloads – 2016-12-31 08:14:18
答
http://search.cpan.org/dist/Spreadsheet-Read/Read.pm
电子表格::阅读尝试透明阅读任何电子表格,并在独立的解析模块来完成实际的电子表格扫描的通用的方式返回其内容。
我对它的Ruby端口也有好运 – 2009-01-09 18:41:39