- :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
- :: cn-dos.cmd -V0.1 -- DOS联盟论坛收贴小虫
- :: 无奈何 - 2006-11-26 - CMD & GAWK
- :: 用法:cn-dos [主题ID] [/h 显示帮助]
- :: 支持文件: - gawk.exe wget.exe concmd.exe htox32c.exe
- :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
- @echo off
- setlocal
- set self="%~f0"
- set path=%path%;%~dp0;%cd%
- set nofile=
- ::依赖文件完整性检查
- for %%i in (gawk.exe wget.exe htox32c.* concmd.*) do (
- @if "%%~$PATH:i" == "" (
- echo.程序所依赖文件 "%%i" 缺失。
- set nofile=1
- ) else ( set %%~ni="%%~$PATH:i" )
- )
- if defined nofile goto :EOF
- ::设置文件保存目录
- set savepath=D:\cn-dos\
- set url_=http://www.cn-dos.net/forum/viewthread.php?action=printable
- if not exist "%savepath%\HTML\" md "%savepath%\HTML"
- if not exist "%savepath%\TXT\" md "%savepath%\TXT"
- pushd "%savepath%"\HTML
- if "%~1" == "?" goto help
- if "%~1" == "/?" goto help
- if "%~1" == "/H" goto help
- if "%~1" == "/h" goto help
- if "%~1" == "" (
- set /p tid=请输入主题 ID :
- ) else (
- set tid=%~1
- )
- :loop
- if "%tid%" == "" goto :EOF
- if "%tid%" == "Q" goto :EOF
- if "%tid%" == "q" goto :EOF
- set url="%url_%&tid=%tid%"
- ::设置提示消息
- echo.请稍侯...
- if exist "%tid%.html" (
- set msg=请求主题 %tid% 已更新!
- ) else (
- set msg=请求主题 %tid% 已下载!
- )
- wget -q -k %url%
- move /y "*.php@action=printable&tid=%tid%" %tid%.html >nul
- ::网页转码 UTF-8 --> GBK
- call concmd /o:gbk %tid%.html >nul
- ::检查提交主题是否存在
- findstr "<br>指定的主题不存在或已被删除" %tid%.html >nul
- if "%ERRORLEVEL%" == "0" (
- del %tid%.html
- echo.所请求主题不存在!ID:%tid%
- set /p tid=请重新输入帖子 ID 或退出[Q]:
- goto loop
- )
- ::网页转为文本
- htox32c /ip /t0 /o0 /d2 /a1 /u7 %tid%.html 2>nul
- gawk "/^#<-1/,/^#>-1/{if(!/^#/)print}" %self% |gawk --re-interval -f "-" %tid%.txt
- del %tid%.txt
- move /y %tid%*.txt ..\TXT\ >nul
- echo.%msg%
- echo.
- ::处理下一个参数
- shift
- set tid=%~1
- goto loop
- goto :EOF
- :help
- echo.DOS联盟论坛收贴小虫
- echo.
- echo.用法:%~n0 [ID1] [ID2...]
- echo. ID 为对应主题 tid ,可指定多个主题。
- echo.实例:%~n0 1
- echo. %~n0 24951 22254
- goto :EOF
- :AwkScript
- #<-1
- BEGIN {
- f=0
- i=0
- }
- {
- if (!f) {
- if($0~/^标题:.*\[打印本页\]$/) {
- f=1
- gsub(/ * \[打印本页\] */,"")
- title=gensub(/^标题: */,"","g")
- tid=gensub(/^([0-9]*).*/,"\\1","g",FILENAME)
- name=gensub(/[/*<>:?"|\\]/,"-","g",title)
- name=tid "--" name ".txt"
- print title >name
- }
- }
- else {
- if ($0~/^-{60,}$/) {
- getline
- if ($0~/^作者:/) {
- print "-------------------------------------------------------------------------------" >>name
- if (i==0) {
- $0="『楼 主』: " $0
- i++
- }
- else {
- $0="『第 " ++i " 楼』: " $0
- }
- }
- else {
- print "---------------------------------------" >>name
- }
- }
- print >>name
- }
- }
- END{
- close(name)
- }
- #>-1
- goto :EOF
无奈何发表于 2006-11-27 01:43 |
|