- :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
- :: gbk2utf8.cmd -V0.1 -- GBK & UTF8 编码互转
- :: 无奈何@cn-dos.net - 2006-11-28 - CMD & GAWK
- :: 用法:gbk2utf8 /I file...
- :: 支持文件: - gawk.exe gbk2utf8.dat
- :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
- @echo off
- setlocal
- set self="%~f0"
- set AwkScript="%temp%\%~n0%~z0.awk"
- set path=%path%;%~dp0;%cd%
- set nofile=
- set error=
- set input=
- ::依赖文件完整性检查
- for %%i in (gawk.exe gbk2utf8.dat) do (
- @if "%%~$PATH:i" == "" (
- echo.程序所依赖文件 "%%i" 缺失。
- set nofile=1
- ) else ( set %%~ni="%%~$PATH:i" )
- )
- if defined nofile goto :EOF
- ::文件改动后更新脚本
- if not exist %AwkScript% (
- del /q "%temp%\%~n0*.awk" 2>nul
- gawk "/^#<-1/,/^#>-1/{if(!/^#/)print}" %self% >%AwkScript%
- )
- :ParseLoop
- if "%~1" == "" goto Start
- if "%~1" == "?" goto SwitchH
- if "%~1" == "/?" goto SwitchH
- rem 参数处理并转到相应标签。
- for %%s in (U u I i h H) do if "%~1"=="/%%s" goto Switch%%s
- if "%F_input%" == "1" (
- if not exist "%~1" set error=警告:文件 "%~1" 不存在。 & goto error
- set input=%input% "%~1"
- shift
- goto ParseLoop
- )
- if "%F_input%" == "-1" shift & goto ParseLoop
- set error=错误: 参数格式不正确 - "%1" !
- goto error
- :SwitchI
- set F_input=1
- if "%~2" == "-" set F_input=-1
- shift
- goto ParseLoop
- :SwitchU
- set F=-1
- shift
- goto ParseLoop
- :error
- echo.%error%
- echo.
- :SwitchH
- echo.gbk2utf8 V0.1 -- GBK ^& UTF8 编码互转
- echo.
- echo.用法:1、%~n0 [/U]
- echo. 2、%~n0 [/U] /I file...
- echo. 3、%~n0 [/U] /I -
- echo.
- echo.选项: /? 显示本简短帮助,等价命令 /H 。
- echo. /U UTF8 转码为 GBK ,默认设置为 GBK 转码为 UTF8 。
- echo. /I 指定转换文件,“-”号从标准输出获得。
- echo. 本参数可以空缺,缺省将从标准输出获得。
- echo. 指定转换文件时,/I 参数不可省略。
- goto :EOF
- :Start
- if "%input%" == "" set F_input=-1
- if "%F_input%" == "-1" (
- gawk -v F=%F% -f %AwkScript%
- ) else (
- gawk -v F=%F% -f %AwkScript% %input%
- )
- goto :EOF
- :AwkScript
- #<-1
- function gbk2utf8(string,flag, reg, gbkreg, utf8reg, char, result){
- gbkreg="[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]"
- utf8reg="[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]"
- reg=gbkreg
- if (flag==-1)
- reg=utf8reg
- RLENGTH = 1
- while(RLENGTH != -1){
- match(string,reg)
- char=substr(string,RSTART,RLENGTH)
- if (RLENGTH>1)
- char=charset[char]
- result=result char
- string=substr(string,RSTART+RLENGTH)
- }
- return result
- }
- BEGIN {
- FS=","
- if (!F) F=1
- if (F==1) {
- while((getline<"gbk2utf8.dat") > 0)
- charset[$1]=$2
- }
- else{
- while((getline<"gbk2utf8.dat") > 0)
- charset[$2]=$1
- }
- close("gbk2utf8.dat")
- }
- {
- x=gbk2utf8($0,F)
- print x
- }
- #>-1
- goto :EOF
无奈何发表于 2006-11-30 01:02 |
|