这是笔者的计算系统概论这门课的实验之一,实验内容是实现简易的LC-3汇编器,将汇编码转换为机器码,原理较为简单,即利用两次遍历,第一次获取标签信息,第二次实现转换。但具体用代码实现极其繁琐,特别是不止18个指令要一个一个写,所以把写好的代码放在下面供大家参考,注释也写了,如果有看不懂的或有什么问题可以直接问我。
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #define MAX_LINE_LENGTH 100 //标签链表结构体,用于储存标签名以及标签位置 typedef struct Lable{ char * name; int pos; struct Lable *next; }Lable; // Function prototypes void read_asm_file(const char *filename, char lines[][MAX_LINE_LENGTH], int *num_lines); void write_output_file(const char *filename, const char *output[], int num_lines); void assemble(char lines[][MAX_LINE_LENGTH], int num_lines, char *output[]); char * translate_instruction(char *instruction,int line,Lable *head); char *bin_transfer(char *str,int bit); char *reg_num(char *str); Lable *get_lable_pos(char lines[][MAX_LINE_LENGTH],int line_index); char *word_ascii(char word); char **my_strtok(const char *source, char flag); int getIndexOfSigns(char ch); long hexToDec(char *source); void reverse(char* left, char* right); char* to_binary(int n, int m); // TODO: Define any additional functions you need to implement the assembler, e.g. the symbol table. int main(int argc, char *argv[]) //int main() { // Command-line argument parsing if (argc != 3) { fprintf(stderr, "Usage: %s <input_file.asm> <output_file.txt> ", argv[0]); return 1; } char input_filename[100]; char output_filename[100]; strcpy(input_filename, argv[1]); strcpy(output_filename, argv[2]); // printf("input input_filename: "); // scanf("%s",&input_filename); // printf("input output_filename: "); // scanf("%s",&output_filename); char lines[100][MAX_LINE_LENGTH]; // Assuming a maximum of 100 lines int num_lines = 0; read_asm_file(input_filename, lines, &num_lines); char *output[100]; // Output array of strings for (int i = 0; i < 100; i++) { output[i] = (char *)malloc(MAX_LINE_LENGTH * sizeof(char)); } assemble(lines, num_lines, output); write_output_file(output_filename, (const char **)output, num_lines); // Free allocated memory for (int i = 0; i < 100; i++) { free(output[i]); } return 0; } void read_asm_file(const char *filename, char lines[][MAX_LINE_LENGTH], int *num_lines) { FILE *file = fopen(filename, "r"); if (file == NULL) { fprintf(stderr, "Unable to open file: %s ", filename); exit(1); } char line[MAX_LINE_LENGTH]; while (fgets(line, MAX_LINE_LENGTH, file)) { strcpy(lines[*num_lines], line); printf("%s",lines[*num_lines]); (*num_lines)++; } fclose(file); } void write_output_file(const char *filename, const char *output[], int num_lines) { FILE *file = fopen(filename, "w"); if (file == NULL) { fprintf(stderr, "Unable to open file: %s ", filename); exit(1); } for (int i = 0; i < num_lines; i++) { fprintf(file, "%s ", output[i]); } fclose(file); } int trans_line = 1; void assemble(char lines[][MAX_LINE_LENGTH], int num_lines, char *output[]) { // TODO: Implement the assembly process // Implement the 2-pass process described in textbook. Lable *lable; lable = get_lable_pos(lines,num_lines);//第一次遍历创建标签链表 char *tem_output; for (int i = 0; i < num_lines; i++) { tem_output = translate_instruction(lines[i],i+trans_line,lable);//临时指针存转码结果 strcpy(output[i],tem_output); printf(" %s",output[i]);//打印结果 } } char * translate_instruction(char *instruction,int line,Lable *head) { char *machine_code;//要返回的机器码 machine_code = (char *)malloc(MAX_LINE_LENGTH * sizeof(char));//分配空间 char *condition_code;//条件码 //下面是用到的29个指令 char str1[] = ".ORIG"; char str2[] = "ADD"; char str3[] = "AND"; char str4[] = "BR"; char str4_1[] = "BRN"; char str4_2[] = "BRZ"; char str4_3[] = "BRP"; char str4_4[] = "BRNZ"; char str4_5[] = "BRNP"; char str4_6[] = "BRZP"; char str4_7[] = "BRNZP";//BR子指令 char str5[] = "JMP"; char str6[] = "RET"; char str7[] = "JSR"; char str8[] = "JSRR"; char str9[] = "LD"; char str10[] = "LDI"; char str11[] = "LDR"; char str12[] = "LEA"; char str13[] = "NOT"; char str14[] = "RTI"; char str15[] = "ST"; char str16[] = "STI"; char str17[] = "STR"; char str18[] = "TRAP"; char str19[] = ".BLKW"; char str20[] = ".STRINGZ"; char str21[] = ".FILL"; char str22[] = ".END"; char ** single ;//用来存分割后的字符串 char space = ' ';//按空格分割 char enter = ' ';//按换行分割,因为字符串最后有 char *reg_number = "000"; char *imm_number = "1";//一些机器码中用到的码 single = my_strtok(instruction,enter); single = my_strtok(single[0],space);//第一次分割,得到各个子字符串 int bin_num; int count;//计数,表示是分割后的第几个字符串 for(count = 0;;count++) { if(strcmp(single[count],str1) == 0 || strcmp(single[count],str21) == 0)//.ORIG or .FILL { single = my_strtok(single[count+1],enter);//去换行操作 machine_code= bin_transfer(single[0],16);//将语句后面带的数直接转为16位二进制编码 break; } if(strcmp(single[count],str2) == 0 || strcmp(single[count],str3) == 0)//ADD or AND { if(strcmp(single[count],str2) == 0) { condition_code = "0001"; } else { condition_code = "0101";//先存入指令码,后面类似 } strcpy(machine_code,condition_code); count ++; if(single[count][0] != '#' && single[count][0] != 'x')//这句并没有多大用处,因为ADD或AND后面两个都是寄存器 { char *ad_binary_1 = reg_num(single[count++]);//判断寄存器 strcat(machine_code,ad_binary_1); } if(single[count][0] != '#' && single[count][0] != 'x') { char *ad_binary_2 = reg_num(single[count++]); strcat(machine_code,ad_binary_2); } single = my_strtok(single[count],enter); if(single[0][0] != '#' && single[0][0] != 'x')//检测是立即数还是寄存器 { strcat(machine_code,reg_number); char *ad_binary_3 = reg_num(single[0]); strcat(machine_code,ad_binary_3); } else { strcat(machine_code,imm_number); char *ad_bin; ad_bin = bin_transfer(single[0],5); strcat(machine_code,ad_bin); } break; } if(strcmp(single[count],str4) == 0 || strcmp(single[count],str4_1) == 0 || strcmp(single[count],str4_2) == 0 || strcmp(single[count],str4_3) == 0 || strcmp(single[count],str4_4) == 0 || strcmp(single[count],str4_5) == 0 || strcmp(single[count],str4_6) == 0 || strcmp(single[count],str4_7) == 0) { if(strcmp(single[count],str4) == 0 || strcmp(single[count],str4_7) == 0)//BR or BRNZP { condition_code = "0000111"; } if(strcmp(single[count],str4_1) == 0)//BRN { condition_code = "0000100"; } if(strcmp(single[count],str4_2) == 0)//BRZ { condition_code = "0000010"; } if(strcmp(single[count],str4_3) == 0)//BRP { condition_code = "0000001"; } if(strcmp(single[count],str4_4) == 0)//BRNZ { condition_code = "0000110"; } if(strcmp(single[count],str4_5) == 0)//BRNP { condition_code = "0000101"; } if(strcmp(single[count],str4_6) == 0)//BRZP { condition_code = "0000011"; } strcpy(machine_code,condition_code); count++; single = my_strtok(single[count],enter); if(single[0][0] == '#' || single[0][0] == 'x')//检测是立即数还是标签 { char *br_bin = bin_transfer(single[0],9); strcat(machine_code,br_bin); } else { Lable *p = head; char *br_bin_2; char *imm_0 = "000000000"; while(p!= NULL)//遍历标签链表找到对应标签所在位置(行数) { if(strcmp(single[0],p->name) == 0) { int sub = p->pos - 1 - line;//标签行数减PC增量减当前行数 if(sub == 0) { strcat(machine_code,imm_0); break; } else { br_bin_2 = to_binary(sub,9); //得到差值并转为9位二进制编码 strcat(machine_code,br_bin_2); break; } } p = p->next; } } break; } char *jmp_bin_2 = "000000"; if(strcmp(single[count],str5) == 0 || strcmp(single[count],str6) == 0)//JMP or RET { if(strcmp(single[count],str5) == 0) { condition_code = "1100000"; strcpy(machine_code,condition_code); count ++ ; single = my_strtok(single[count],enter); char *jmp_bin = reg_num(single[0]); strcat(machine_code,jmp_bin); strcat(machine_code,jmp_bin_2); } else { condition_code = "1100000111000000"; strcpy(machine_code,condition_code); } break; } if(strcmp(single[count],str7) == 0 || strcmp(single[count],str8) == 0)//JSR or JSRR { if(strcmp(single[count],str7) == 0) { condition_code = "01001"; strcpy(machine_code,condition_code); count ++ ; single = my_strtok(single[count],enter); if(single[0][0] == '#' || single[0][0] == 'x') { char *jsr_num_bin = bin_transfer(single[0],11); strcat(machine_code,jsr_num_bin); } else { Lable *jsr_p = head;//同样的检测标签并返回,只不过变为了11位 jsr_p->name = (char *)malloc(100 *sizeof(char)); char *jsr_bin_2; while(jsr_p != NULL) { if(strcmp(single[0],jsr_p->name) == 0) { int jsr_sub = jsr_p->pos - 1 - line ; jsr_bin_2 = to_binary(jsr_sub,11); break; } jsr_p = jsr_p->next; } strcat(machine_code,jsr_bin_2); } } else { condition_code = "0100000"; strcpy(machine_code,condition_code); count++; single = my_strtok(single[count],enter); char *jsrr_bin = reg_num(single[0]); strcat(machine_code,jsrr_bin); strcat(machine_code,jmp_bin_2); } break; } if(strcmp(single[count],str9) == 0 || strcmp(single[count],str10) == 0 || strcmp(single[count],str15) == 0 ||strcmp(single[count],str16) == 0)//LD or LDI or ST or STI { if(strcmp(single[count],str9) == 0) { condition_code = "0010"; } else if(strcmp(single[count],str10) == 0) { condition_code = "1010"; } else if(strcmp(single[count],str15) == 0) { condition_code = "0011"; } else { condition_code = "1011"; } strcpy(machine_code,condition_code); count ++ ; char *ld_bin = reg_num(single[count]); strcat(machine_code,ld_bin); count ++ ; single = my_strtok(single[count],enter); if(single[0][0] == '#' || single[0][0] == 'x') { char *ld_num_bin = bin_transfer(single[0],9); strcat(machine_code,ld_num_bin); } else { Lable *ld_p = head; ld_p->name = (char *)malloc(100 *sizeof(char)); char *ld_bin_2; while(ld_p != NULL) { if(strcmp(single[0],ld_p->name) == 0) { int ld_sub = ld_p->pos - 1 - line; ld_bin_2 = to_binary(ld_sub,9); break; } ld_p = ld_p->next; } strcat(machine_code,ld_bin_2); } break; } if(strcmp(single[count],str11) == 0 || strcmp(single[count],str17) == 0)//LDR or STR { if(strcmp(single[count],str11) == 0) { condition_code = "0110"; } else { condition_code = "0111"; } strcpy(machine_code,condition_code); count ++ ; char *ldr_bin_1 = reg_num(single[count]); strcat(machine_code,ldr_bin_1); count ++ ; char *ldr_bin_2 = reg_num(single[count]); strcat(machine_code,ldr_bin_2); count ++ ; single = my_strtok(single[count],enter); char *ldr_bin_3 = bin_transfer(single[0],6); strcat(machine_code,ldr_bin_3); break; } if(strcmp(single[count],str12) == 0)//LEA { condition_code = "1110"; strcpy(machine_code,condition_code); count ++; char *lea_bin_1 = reg_num(single[count]); strcat(machine_code,lea_bin_1); count++; single = my_strtok(single[count],enter); if(single[0][0] == '#' || single[0][0] == 'x') { char *lea_num_bin = bin_transfer(single[0],9); strcat(machine_code,lea_num_bin); } else { Lable *lea_p = head; lea_p->name = (char *)malloc(100 *sizeof(char)); char *lea_bin_2; while(lea_p!= NULL) { if(strcmp(single[0],lea_p->name) == 0) { int lea_sub = lea_p->pos - 1 - line; lea_bin_2 = to_binary(lea_sub,9); break; } lea_p = lea_p->next; } strcat(machine_code,lea_bin_2); } break; } if(strcmp(single[count],str13) == 0)//NOT { condition_code = "1001"; strcpy(machine_code,condition_code); count ++ ; char *not_bin_1 = reg_num(single[count]); strcat(machine_code,not_bin_1); count ++ ; single = my_strtok(single[count],enter); char *not_bin_2 = reg_num(single[0]); strcat(machine_code,not_bin_2); char *not_bin_3 = "111111"; strcat(machine_code,not_bin_3); break; } if(strcmp(single[count],str14) == 0)//RTI { condition_code= "1000000000000000"; strcpy(machine_code,condition_code); break; } if(strcmp(single[count],str18) == 0)//.TRAP { condition_code = "11110000"; strcpy(machine_code,condition_code); count++; single = my_strtok(single[count],enter); char *trap_bin = bin_transfer(single[0],8); strcat(machine_code,trap_bin); break; } if(strcmp(single[count],str19) == 0)//.BLKW { count++; int blkw_dec; if(single[count][0] == 'x') { char blkw_tem1 = 'x'; single = my_strtok(single[count],blkw_tem1); blkw_dec = hexToDec(single[0]); } if(single[count][0] == '#') { char blkw_tem2 = '#'; single = my_strtok(single[count],blkw_tem2); blkw_dec = strtol(single[0],NULL,10); } trans_line += blkw_dec - 1; char *blkw_bin_1 = "0000000000000000"; char *blkw_bin_2 = "0000000000000000 "; if(blkw_dec == 1) { machine_code = "0000000000000000"; } else { for(int bi = 0;bi<blkw_dec - 1;bi++)//表示存入多少行的16位的0 { if(bi == 0) { condition_code = "0000000000000000 " ; strcpy(machine_code,condition_code); } else { strcat(machine_code,blkw_bin_2); } } strcat(machine_code,blkw_bin_1); } break; } if(strcmp(single[count],str20) == 0)//.STRINGZ { char ascii_bin[100] = {0}; count++; char *space_1 = " "; while(single[count]!=NULL) { strcat(ascii_bin,single[count]); strcat(ascii_bin,space_1); count ++; } int trans_pos = strlen(ascii_bin) - 3; trans_line += trans_pos;//全局变量控制当前语句行数,只有BLKW和STRINGZ会改变 single = my_strtok(ascii_bin,enter); int si = 1; char *enter_1 = " ";//换行 while(single[0][si]!='"' && single[0][si+1]!=' ')//把双引号内的字符串一个一个转换为ascii值对应的二进制数并存入 { if(si == 1) { condition_code = word_ascii(single[0][si]); strcpy(machine_code,condition_code); strcat(machine_code,enter_1); } else if(single[0][si+1]!='"') { char *str_bin = word_ascii(single[0][si]); strcat(machine_code,str_bin); strcat(machine_code,enter_1); } else { char *str_bin_2 = word_ascii(single[0][si]); strcat(machine_code,str_bin_2); char *zero = "0000000000000000"; strcat(machine_code,enter_1); strcat(machine_code,zero); } si++; } break; } if(strcmp(single[count],str22) == 0)//.END { machine_code = " "; break; } } return machine_code; } char *word_ascii(char word)//字母转换为ascii值并转换为16位二进制编码 { int ascii = (int)word; char *ascii_bin = to_binary(ascii,16); return ascii_bin; } Lable *get_lable_pos(char lines[][MAX_LINE_LENGTH],int line_index) { char *str[] = {".ORIG","ADD","AND","BR","BRN", "BRZ","BRP","BRNZ","BRNP","BRZP", "BRNZP","JMP","RET","JSR","JSRR", "LD","LDI","LDR","LEA","NOT","RTI", "ST","STI","STR","TRAP",".BLKW", ".STRINGZ",".FILL",".END"}; char **act_str; Lable *lable_head = (Lable*)malloc(sizeof(Lable)); //标签链表头结点 int j = 0; int temp = 0; int str_pos = 0; for(int k = 0 ;k<line_index;k++) { char *instroction = lines[k]; char space_lable = ' '; char enter_lable = ' '; char **act_str_son; act_str = my_strtok(instroction,space_lable); act_str_son = my_strtok(act_str[0],enter_lable); temp = 0; for(int j = 0;j<29;j++) { if(strcmp(act_str_son[0],str[j])!=0) { temp++; } } if(temp == 29)//当不是这29个指令时,记为标签,头插法创建标签链表 { Lable *p = (Lable*)malloc(sizeof(Lable)); p->name = (char*)malloc(100 * sizeof(char)); strcpy(p->name,act_str_son[0]); p->pos = k + str_pos + 1;//记录标签位置 p->next = lable_head ->next; lable_head->next = p; if(strcmp(act_str[1],str[26]) == 0)//STRINGZ进行更改 { char str_len[100] = {0}; char *space_2 = " "; int m = 2; while(act_str[m]!=NULL) { strcat(str_len,act_str[m]); strcat(str_len,space_2); m ++; } str_pos += strlen(str_len) - 4;//减4是两个双引号,多加的空格和最后的换行 } if(strcmp(act_str[1],str[25]) == 0)//BLKW进行更改 { char lable_tem = '#'; act_str = my_strtok(act_str_son[2],lable_tem); int lable_dec = strtol(act_str[0],NULL,10); str_pos += lable_dec; } } } return lable_head; } char *reg_num(char *str)//检测寄存器模块,输出为寄存器对应二进制编码 { char reg1[] = "R0"; char reg2[] = "R1"; char reg3[] = "R2"; char reg4[] = "R3"; char reg5[] = "R4"; char reg6[] = "R5"; char reg7[] = "R6"; char reg8[] = "R7"; char tem = ','; char **reg_str; reg_str = my_strtok(str,tem);//去逗号 char *bin = (char *)malloc(sizeof(char) * 3); if(strcmp(reg_str[0],reg1) == 0) { bin = "000"; } if(strcmp(reg_str[0],reg2) == 0) { bin = "001"; } if(strcmp(reg_str[0],reg3) == 0) { bin = "010"; } if(strcmp(reg_str[0],reg4) == 0) { bin = "011"; } if(strcmp(reg_str[0],reg5) == 0) { bin = "100"; } if(strcmp(reg_str[0],reg6) == 0) { bin = "101"; } if(strcmp(reg_str[0],reg7) == 0) { bin = "110"; } if(strcmp(reg_str[0],reg8) == 0) { bin = "111"; } return bin; } char **my_strtok(const char *source, char flag) { char **pt; int j, n = 0; int count = 1; int len = strlen(source); // 动态分配一个 *tmp,静态的话,变量len无法用于下标 char *tmp = (char*)malloc((len + 1) * sizeof(char)); tmp[0] = ' '; for (int i = 0; i < len; ++i) { if (source[i] == flag && source[i+1] == ' ') continue; else if (source[i] == flag && source[i+1] != flag) count++; } // 多分配一个char*,是为了设置结束标志 pt = (char**)malloc((count+1) * sizeof(char*)); count = 0; for (int i = 0; i < len; ++i) { if (i == len - 1 && source[i] != flag) { tmp[n++] = source[i]; tmp[n] = ' '; // 字符串末尾添加空字符 j = strlen(tmp) + 1; pt[count] = (char*)malloc(j * sizeof(char)); strcpy(pt[count++], tmp); } else if (source[i] == flag) { j = strlen(tmp); if (j != 0) { tmp[n] = ' '; // 字符串末尾添加空字符 pt[count] = (char*)malloc((j+1) * sizeof(char)); strcpy(pt[count++], tmp); // 重置tmp n = 0; tmp[0] = ' '; } } else tmp[n++] = source[i]; } free(tmp); // 设置结束标志 pt[count] = NULL; return pt; } void reverse(char* left, char* right) //翻转数组 { while (left < right) { char tmp = *right; *right = *left; *left = tmp; left++; right--; } } char* to_binary(int n, int m)//假设要求m位二进制数 { char a[10000] = { ' ' }; int i = 0; int n_2 = 0; if(n<0)//若是负数 { n = abs(n); n_2 = pow(2,m-1); n = n_2 - n;//2的n-1次方减去绝对值得补码 while (n != 0) { a[i++] = '0' + n % 2; n = n / 2; } reverse(a, a + strlen(a) - 1); char b[10000] = {'1'}; for (int i = 1; i < m - strlen(a); i++) { b[i] = '0'; } return strcat(b,a); } else { while (n != 0) { a[i++] = '0' + n % 2; n = n / 2; } reverse(a, a + strlen(a) - 1); char b[10000] = {' '}; for (int i = 0; i < m - strlen(a); i++) { b[i] = '0'; } return strcat(b,a); } } long hexToDec(char *source)//十六进制转十进制 { long sum = 0; long t = 1; int i, len; len = strlen(source); for(i=len-1; i>=0; i--) { sum += t * getIndexOfSigns(*(source + i)); t *= 16; } return sum; } int getIndexOfSigns(char ch) { if(ch >= '0' && ch <= '9') { return ch - '0'; } if(ch >= 'A' && ch <='F') { return ch - 'A' + 10; } if(ch >= 'a' && ch <= 'f') { return ch - 'a' + 10; } return -1; } char *bin_transfer(char *str,int bit)//"xn" or "#n"转换为二进制编码字符串 { int dec; char *bin = (char *)malloc(sizeof(char) * 16); int index = 0; char **bin_str; if(str[0] == 'x') { char tem = 'x'; bin_str = my_strtok(str,tem); dec = hexToDec(bin_str[0]); } if(str[0] == '#') { char tem = '#'; bin_str = my_strtok(str,tem); dec = strtol(bin_str[0],NULL,10); } char *binary = to_binary(dec,bit); return binary; }