高分求一个c语言的词法分析器(运行环境最好是wintc),要求如下~

识别简单语言的单词符号
识别简单语言的基本字、标识符、无符号整数、运算符和界符。
begin 1
if 2
then 3
while 4
do 5
end 6
l ( l | d ) * 10 (即标识符)
dd * 11 (无符号即整数)
用科学计数法表示的数 12
+ 13
- 14
* 15
/ 16
:17
:= 18
< 20
<> 21
<= 22
> 23
>= 24
= 25
; 26
( 27
) 28
# 0

例如输入: begin x:=9; if x>0 then x:=2*x+1/3; end #
输出: (1 begin )
(10 x )
(18 := )
(11 9)
(26 ; )
(2 if )
...
输入:1.23e6
输出:(12 1.23e6)
明天中午以前希望就能有解答 满足条件追加奖励

第1个回答  2008-10-23
/*******************************************
词法分析程序
2007.12.5
********************************************/
#include<stdlib.h>
#include<stdio.h>
#include<string.h>

/*******************************************
主函数
********************************************/
main()
{
int i;
init(); /*初始化*/
i=scanner(); /*扫描源程序*/
if(i==1)
printf("The answer is in 'output.txt':");
getch();
}
/*******************************************
初始化函数
********************************************/
void init()
{
char *key[]={" ","auto","break","case","char","const","continue","default","do","double",
"else","enum","extern","float","for","goto","if","int","long","register",
"return","short","signed","sizeof","static","struct","switch","typedef",
"union","unsigned","void","volatile","while"}; /*C语言所有关键字,共32个*/
char *limit[]={" ","(",")","[","]","->",".","!","++","--","&","~",
"*","/","%","+","-","<<",">>","<","<=",">",">=","==","!=","&&","||",
"=","+=","-=","*=","/=",",",";","{","}","#","_","'"};/*共38个运算、限界符*/
FILE *fp;
int i;
char c;
fp=fopen("key.txt","w");
for(i=1;i<=32;i++)
fprintf(fp,"%s\n",key[i]);
fclose(fp);/*初始化关键字*/
fp=fopen("limit.txt","w");
for(i=1;i<=38;i++)
fprintf(fp,"%s\n",limit[i]);
c='"';
fprintf(fp,"%c\n",c);
fclose(fp); /*初始化运算、限界符表*/
fp=fopen("id.txt","w");
fclose(fp); /*初始化标识符表*/
fp=fopen("constant.txt","w");
fclose(fp); /*初始化常数表*/
fp=fopen("output.txt","w");
fclose(fp); /*初始化输出文件*/

}
/*******************************************
十进制转二进制函数
********************************************/
char * dtb(char *buf)
{
int temp[20];
char *binary;
int value=0,i=0,j;
for(i=0;buf[i]!='\0';i++)
value=value*10+(buf[i]-48); /*先将字符转化为十进制数*/
if(value==0)
{
binary=malloc(2*sizeof(char));
binary[0]='0';
binary[1]='\0';
return(binary);
}
i=0;
while(value!=0)
{
temp[i++]=value%2;
value/=2;
}
temp[i]='\0';
binary=malloc((i+1)*sizeof(char));
for(j=0;j<=i-1;j++)
binary[j]=(char)(temp[i-j-1]+48);
binary[i]='\0';
return(binary);
}

/*******************************************
根据不同命令查表或造表函数
********************************************/
int find(char *buf,int type,int command)
{
int number=0;
FILE *fp;
char c;
char temp[30];
int i=0;
switch(type)
{
case 1: fp=fopen("key.txt","r");break;
case 2: fp=fopen("id.txt","r");break;
case 3: fp=fopen("constant.txt","r");break;
case 4: fp=fopen("limit.txt","r");
}
c=fgetc(fp);
while(c!=EOF)
{
while(c!='\n')
{
temp[i++]=c;
c=fgetc(fp);
}
temp[i]='\0';
i=0;
number++;
if(strcmp(temp,buf)==0)
{
fclose(fp);
return(number); /*若找到,返回在相应表中的序号*/
}
else
c=fgetc(fp);
}
if(command==1)
{
fclose(fp);
return(0); /*找不到,当只需查表,返回0,否则还需造表*/
}
switch(type)
{
case 1: fp=fopen("key.txt","a");break;
case 2: fp=fopen("id.txt","a");break;
case 3: fp=fopen("constant.txt","a");break;
case 4: fp=fopen("limit.txt","a");
}
fprintf(fp,"%s\n",buf);
fclose(fp);
return(number+1); /*造表时,将字符串添加到表尾并返回序号值*/
}

/*******************************************
数字串处理函数
********************************************/
void cs_manage(char *buffer)
{
FILE *fp;
char *pointer;
int result;
pointer=dtb(buffer);
result=find(pointer,3,2); /*先查常数表,若找不到则造入常数表并返回序号值*/
fp=fopen("output.txt","a");
fprintf(fp,"%s\t\t\t3\t\t\t%d\n",buffer,result);
fclose(fp); /*写入输出文件*/
}

/*******************************************
字符串处理函数
********************************************/
void ch_manage(char *buffer)
{
FILE *fp;
int result;
result=find(buffer,1,1); /*先查关键字表*/
fp=fopen("output.txt","a");
if(result!=0)
fprintf(fp,"%s\t\t\t1\t\t\t%d\n",buffer,result); /*若找到,写入输出文件*/
else
{
result=find(buffer,2,2); /*若找不到,则非关键字,查标识符表,还找不到则造入标识符表*/
fprintf(fp,"%s\t\t\t2\t\t\t%d\n",buffer,result);
} /*写入输出文件*/
fclose(fp);
}

/*******************************************
出错处理函数
********************************************/
void er_manage(char error,int lineno)
{
printf("\nerror: %c ,line %d",error,lineno); /*报告出错符号和所在行数*/
}
/*******************************************
扫描程序
********************************************/
int scanner()
{
FILE *fpin,*fpout;
char filename[20];
char ch;
int i=0,line=1;
int count,result,errorno=0;
char array[30];
char *word;
printf("\nInput the file name:");/*输入要编译文件的名字和路径*/
scanf("%s",filename);
if((fpin=fopen(filename,"r"))==NULL)
{
printf("the file you input is not exist!");
getch();
return 0;
}
ch=fgetc(fpin);
while(ch!=EOF)
{ /*按字符依次扫描源程序,直至结束*/
i=0;
if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_'))
{ /*以字母开头*/
while(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_')||((ch>='0')&&(ch<='9')))
{
array[i++]=ch;
ch=fgetc(fpin);
}
word=(char *)malloc((i+1)*sizeof(char));
memcpy(word,array,i);
word[i]='\0';
ch_manage(word);
if(ch!=EOF)
fseek(fpin,-1L,SEEK_CUR);
}
else if(ch>='0'&&ch<='9')
{ /*以数字开头*/
while(ch>='0'&&ch<='9')
{
array[i++]=ch;
ch=fgetc(fpin);
}
word=(char *)malloc((i+1)*sizeof(char));
memcpy(word,array,i);
word[i]='\0';
cs_manage(word);
if(ch!=EOF)
fseek(fpin,-1L,SEEK_CUR);
}
else if((ch==' ')||(ch=='\t'))
; /*消除空格符和水平制表符*/
else if(ch=='\n')
line++; /*消除回车并记录行数*/
else if(ch=='/')
{ /*消除注释*/
ch=fgetc(fpin);
if(ch=='=')
{ /*判断是否为‘/=’符号*/
fpout=fopen("output.txt","a");
fprintf(fpout,"/=\t\t\t4\t\t\t32\n");
fclose(fpout);
}
else if(ch!='*')
{ /*若为除号,写入输出文件*/
fpout=fopen("output.txt","a");
fprintf(fpout,"/\t\t\t4\t\t\t13\n");
fclose(fpout);
fseek(fpin,-1L,SEEK_CUR);
}
else if(ch=='*')
{ /*若为注释的开始,消除包含在里面的所有字符*/
count=0;
ch=fgetc(fpin);
while(count!=2)
{ /*当扫描到‘*’且紧接着下一个字符为‘/’才是注释的结束*/
count=0;
while(ch!='*')
ch=fgetc(fpin);
count++;
ch=fgetc(fpin);
if(ch=='/')
count++;
else
ch=fgetc(fpin);
}
}
}
else if(ch=='"')
{ /*消除包含在双引号中的字符串常量*/
fpout=fopen("output.txt","a");
fprintf(fpout,"%c\t\t\t4\t\t\t37\n",ch);
ch=fgetc(fpin);
while(ch!='"')
ch=fgetc(fpin);
fprintf(fpout,"%c\t\t\t4\t\t\t37\n",ch);
fclose(fpout);
}
else
{ /*首字符为其它字符,即运算限界符或非法字符*/
array[0]=ch;
ch=fgetc(fpin); /*再读入下一个字符,判断是否为双字符运算、限界符*/
if(ch!=EOF)
{ /*若该字符非文件结束符*/
array[1]=ch;
word=(char *)malloc(3*sizeof(char));
memcpy(word,array,2);
word[2]='\0';
result=find(word,4,1); /*先检索是否为双字符运算、限界符*/
if(result==0)
{ /*若不是*/
word=(char *)malloc(2*sizeof(char));
memcpy(word,array,1);
word[1]='\0';
result=find(word,4,1); /*检索是否为单字符运算、限界符*/
if(result==0)
{ /*若还不是,则为非法字符*/
er_manage(array[0],line);
errorno++;
fseek(fpin,-1L,SEEK_CUR);
}
else
{ /*若为单字符运算、限界符,写入输出文件并将扫描文件指针回退一个字符*/
fpout=fopen("output.txt","a");
fprintf(fpout,"%s\t\t\t4\t\t\t%d\t\n",word,result);
fclose(fpout);
fseek(fpin,-1L,SEEK_CUR);
}
}
else
{ /*若为双字符运算、限界符,写输出文件*/
fpout=fopen("output.txt","a");
fprintf(fpout,"%s\t\t\t4\t\t\t%d\n",word,result);
fclose(fpout);
}
}
else
{ /*若读入的下一个字符为文件结束符*/
word=(char *)malloc(2*sizeof(char));
memcpy(word,array,1);
word[1]='\0';
result=find(word,4,1); /*只考虑是否为单字符运算、限界符*/
if(result==0) /*若不是,转出错处理*/
er_manage(array[0],line);
else
{ /*若是,写输出文件*/
fpout=fopen("output.txt","a");
fprintf(fpout,"%s\t\t\t4\t\t\t%d\n",word,result);
fclose(fpout);
}
}
}
ch=fgetc(fpin);
}
fclose(fpin);
printf("\nThere are %d error(s).\n",errorno); /*报告错误字符个数*/
return 1;
}

/*需要处理的文件名字*/
source.txt
#include<stdio.h>
main( )
{
int i;
int k;
int sum;
k=50;
i=0;
sum=0;
while( i<=k )
{
sum=sum+i;
i=i+1;
k=k+(sum+4)/i-1;
if( k>2 )
{
i=i+k;
}
}
if( sum>=25 )
{
i=1;
}else
{
i=0;
}
}
上面是我在学编译原理的时候变得一个词法分析程序,你看看吧,需要处理的程序在source.txt文件中,有不明白的发邮件问我[email protected]本回答被提问者采纳
第2个回答  2008-10-23
用boost中的regex
第3个回答  2008-10-24
这是我最近编的一个词法分析器,方法比较简单,输出和你的有些差别,可以参考一下

#include <iostream>
#include <string>
using namespace std;

bool jiancha_int(int i,int j,char s[100])
{
if((j-i)==2)
{
if(s[i]=='i')
{
i++;
if(s[i]=='n')
{
i++;
if(s[i]=='t')
{
cout<<"int是一个关键字"<<endl;
return true;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}

bool jiancha_if(int i,int j,char s[100])
{
if((j-i)==1)
{
if(s[i]=='i')
{
i++;
if(s[i]=='f')
{
cout<<"if是一个关键字"<<endl;
return true;
}
else return false;
}
else return false;
}
else return false;
}

bool jiancha_else(int i,int j,char s[100])
{
if((j-i)==3)
{
if(s[i]=='e')
{
i++;
if(s[i]=='l')
{
i++;
if(s[i]=='s')
{
i++;
if(s[i]=='e')
{
cout<<"else是一个关键字"<<endl;
return true;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}

bool jiancha_void(int i,int j,char s[100])
{
if((j-i)==3)
{
if(s[i]=='v')
{
i++;
if(s[i]=='o')
{
i++;
if(s[i]=='i')
{
i++;
if(s[i]=='d')
{
cout<<"void是一个关键字"<<endl;
return true;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}

bool jiancha_while(int i,int j,char s[100])
{
if((j-i)==4)
{
if(s[i]=='w')
{
i++;
if(s[i]=='h')
{
i++;
if(s[i]=='i')
{
i++;
if(s[i]=='l')
{
i++;
if(s[i]=='e')
{
cout<<"while是一个关键字"<<endl;
return true;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}
else return false;
}

char numb[10]={'0','1','2','3','4','5','6','7','8','9'};
char letter[53]={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','_','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'};
char digit[63]={'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','_','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9'};
bool Identifier(int x,int y,char s[100])
{
int i,a=x,b=y,n=0,m=1,c=0,a1=x,b1=y,m1=0,n1=1;;
int p[1024];
for(i=0;i<1024;i++)
p[i]=0;
for(i=0;i<53;i++)
{
if(s[x]==letter[i])
{n=1;}
}
if(n==1)
{}
else
{
int P[1024];
for(i=0;i<1024;i++)
{
P[i]=0;
}
while(x<y+1)
{
for(i=0;i<10;i++)
{
if(s[x]==numb[i])
{
P[m1]=1;
}
}
x++;
m1++;
}

for(i=0;i<m1;i++)
{
if(P[i]==0)
{n1=0;}
}
if(n1==1)
{
for(i=a;i<=b;i++)
cout<<s[i];
cout<<"是一个常数"<<endl;
x=a1;y=b1;
return true;
}
else
{
for(i=a;i<=b;i++)
cout<<s[i];
cout<<"是一个非法标识符"<<endl;
x=a;y=b;
return true;
}
}
x=a1;y=b1;
x++;
while(x<y+1)
{
for(i=0;i<63;i++)
{
if(s[x]==digit[i])
{p[c]=1;}
}
for(i=0;i<=c;i++)
{
if(p[i]==0)
{m=0;}
}
if(m==1)
{}
else
{
for(i=a;i<=b;i++)
cout<<s[i];
cout<<"是一个非法标识符"<<endl;
x=a;y=b;
return true;
}
x++;
c++;
}
for(i=a;i<=b;i++)
cout<<s[i];
cout<<"是一个标识符"<<endl;
x=a;y=b;
return true;
}

void main()
{
char sentence[100];
cout<<"请输入测试语句:"<<endl;
cin.getline(sentence,100,'\n');
string str(sentence);
cout<<"---------------------------------"<<endl;
int i,j,t=-1;
for(i=0;i<str.length();i++)
{
if(str[i]==' ')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"------遇到空格"<<endl;
}
if(str[i]=='\t')
{

for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"------遇到Tab"<<endl;
}
if(str[i]=='+')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"+为运算符"<<endl;
}
if(str[i]=='-')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"-为运算符"<<endl;
}
if(str[i]=='*')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"*为运算符"<<endl;
}
if(str[i]=='/')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"/为运算符"<<endl;
}
if(str[i]=='=')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"=为分界符"<<endl;
}
if(str[i]=='(')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<"(为分界符"<<endl;
}
if(str[i]==')')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<")为分界符"<<endl;
}
if(str[i]==';')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<";为分界符"<<endl;
}
if(str[i]==',')
{
for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
t=i;
cout<<",为分界符"<<endl;
}
if(str[i]=='#')
{

for(j=t+1;j<i;j++)
if(jiancha_int(j,i-1,sentence)||jiancha_if(j,i-1,sentence)||jiancha_else(j,i-1,sentence)||jiancha_void(j,i-1,sentence)||jiancha_while(j,i-1,sentence)||Identifier(j,i-1,sentence))
{break;}
cout<<"------停止"<<endl;
}
}
cout<<"---------------------------------"<<endl;
cout<<str<<endl;

}