義烏網(wǎng)站建設(shè)微信開(kāi)發(fā)2023年6月份疫情嚴(yán)重嗎
由于編譯原理課的Lab1為自制詞法分析器,所以筆者用C++實(shí)現(xiàn)了一個(gè)極簡(jiǎn)的C語(yǔ)言詞法分析器,用于分析C語(yǔ)言源代碼。它可以處理關(guān)鍵字、標(biāo)識(shí)符、整數(shù)、實(shí)數(shù)、浮點(diǎn)數(shù)的科學(xué)計(jì)數(shù)法表示、運(yùn)算符、分隔符、字符串字面量、字符字面量、注釋和預(yù)處理指令。請(qǐng)注意,此版本的詞法分析器不是很完善,但它應(yīng)該能夠處理大多數(shù)簡(jiǎn)單的C語(yǔ)言源代碼。
用戶輸入輸入文件名和輸出文件名,然后檢查這些文件是否可以正確打開(kāi)。然后,我們從輸入文件中讀取內(nèi)容,對(duì)其進(jìn)行詞法分析,并將結(jié)果寫(xiě)入輸出文件中。最后,我們通知用戶詞法分析已完成,并提示用戶查看輸出文件以獲取結(jié)果。
mylexer.cpp文件?
詞法分析器核心文件
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <unordered_set>using namespace std;enum class TokenType
{Keyword,Identifier,Integer,Real,Operator,Separator,StringLiteral,CharLiteral,Comment,Preprocessor,Unknown
};struct Token
{TokenType type;string value;
};bool isKeyword(const string &value)
{static const unordered_set<string> keywords = {"auto", "break", "case", "char", "const", "continue", "default", "do","double", "else", "enum", "extern", "float", "for", "goto", "if", "int","long", "register", "return", "short", "signed", "sizeof", "static","struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while"};return keywords.find(value) != keywords.end();
}bool isOperator(char c)
{static const unordered_set<char> operators = {'+', '-', '*', '/', '%', '>', '<', '=', '&', '|', '!', '~', '^', '?', ':'};return operators.find(c) != operators.end();
}bool isSeparator(char c)
{static const unordered_set<char> separators = {'(', ')', '[', ']', '{', '}', ',', ';', '.', '#'};return separators.find(c) != separators.end();
}vector<Token> lex(const string &input)
{vector<Token> tokens;string buffer;auto flushBuffer = [&](){if (!buffer.empty()){if (isKeyword(buffer)){tokens.push_back({TokenType::Keyword, buffer});}else{tokens.push_back({TokenType::Identifier, buffer});}buffer.clear();}};size_t i = 0;while (i < input.length()){char c = input[i];if (isalpha(c) || c == '_'){buffer.push_back(c);i++;}else{flushBuffer();if (isdigit(c)){string number;number.push_back(c);i++;while (i < input.length() && (isdigit(input[i]) || input[i] == '.' || tolower(input[i]) == 'e')){number.push_back(input[i]);if (tolower(input[i]) == 'e' && i + 1 < input.length() && (input[i + 1] == '+' || input[i + 1] == '-')){number.push_back(input[++i]);}i++;}while (i < input.length() && (tolower(input[i]) == 'u' || tolower(input[i]) == 'l')){number.push_back(input[i]);i++;}tokens.push_back({number.find('.') != string::npos || number.find('e') != string::npos || number.find('E') != string::npos ? TokenType::Real : TokenType::Integer, number});}else if (isOperator(c)){if (c == '/' && i + 1 < input.length()){if (input[i + 1] == '/'){i += 2;string comment;while (i < input.length() && input[i] != '\n'){comment.push_back(input[i]);i++;}tokens.push_back({TokenType::Comment, comment});}else if (input[i + 1] == '*'){i += 2;string comment;while (i + 1 < input.length() && !(input[i] == '*' && input[i + 1] == '/')){comment.push_back(input[i]);i++;}if (i + 1 < input.length()){// comment.push_back(input[i]);i += 2;}tokens.push_back({TokenType::Comment, comment});// cout << "here " << endl;}}else{tokens.push_back({TokenType::Operator, string(1, c)});i++;}}else if (isSeparator(c)){if (c == '#'){string preprocessor;i++;while (i < input.length() && (isalnum(input[i]) || input[i] == '_')){preprocessor.push_back(input[i]);i++;}tokens.push_back({TokenType::Preprocessor, preprocessor});}else{tokens.push_back({TokenType::Separator, string(1, c)});i++;}}else if (c == '\"'){string str_literal;i++;while (i < input.length() && input[i] != '\"'){if (input[i] == '\\' && i + 1 < input.length()){str_literal.push_back(input[i]);i++;}str_literal.push_back(input[i]);i++;}i++;tokens.push_back({TokenType::StringLiteral, str_literal});}else if (c == '\''){string char_literal;i++;if (i < input.length()){if (input[i] == '\\' && i + 1 < input.length()){char_literal.push_back(input[i]);i++;}char_literal.push_back(input[i]);i++;}i++;tokens.push_back({TokenType::CharLiteral, char_literal});}else{i++;}}}flushBuffer();return tokens;
}int main()
{string input_filename;string output_filename;cout << "Enter the input file name: ";cin >> input_filename;cout << "Enter the output file name: ";cin >> output_filename;ifstream infile(input_filename);ofstream outfile(output_filename);if (!infile){cerr << "Error opening the input file!" << endl;return 1;}if (!outfile){cerr << "Error opening the output file!" << endl;return 1;}string input((istreambuf_iterator<char>(infile)), istreambuf_iterator<char>());auto tokens = lex(input);for (const auto &token : tokens){// outfile << "Token type: " << static_cast<int>(token.type) << ", value: " << token.value << endl;outfile << "Token type: ";switch (token.type){case TokenType::Keyword:outfile << "Keyword";break;case TokenType::Identifier:outfile << "Identifier";break;case TokenType::Integer:outfile << "Integer";break;case TokenType::Real:outfile << "Real";break;case TokenType::Operator:outfile << "Operator";break;case TokenType::Separator:outfile << "Separator";break;case TokenType::StringLiteral:outfile << "StringLiteral";break;case TokenType::CharLiteral:outfile << "CharLiteral";break;case TokenType::Comment:outfile << "Comment";break;case TokenType::Preprocessor:outfile << "Preprocessor";break;case TokenType::Unknown:outfile << "Unknown";break;}outfile << ", Value: " << token.value << endl;}cout << "Lexical analysis complete." << endl;return 0;
}
input.c文件?
用于詞法分析器的輸入文件
#include <stdio.h>
#define N 6int main()
{// Single-Line Commentsint a = 0;double b = 1.5;long c = 100L;char d = 'd';char s[6] = "hello";/*Multiline commentMultiline comment*/if (a > 0){printf("%s", s);}else{c = a + N;}return 0;
}
output.txt文件?
?詞法分析器的輸出結(jié)果
Token type: Preprocessor, Value: include
Token type: Operator, Value: <
Token type: Identifier, Value: stdio
Token type: Separator, Value: .
Token type: Identifier, Value: h
Token type: Operator, Value: >
Token type: Preprocessor, Value: define
Token type: Identifier, Value: N
Token type: Integer, Value: 6
Token type: Keyword, Value: int
Token type: Identifier, Value: main
Token type: Separator, Value: (
Token type: Separator, Value: )
Token type: Separator, Value: {
Token type: Comment, Value: Single-Line Comments
Token type: Keyword, Value: int
Token type: Identifier, Value: a
Token type: Operator, Value: =
Token type: Integer, Value: 0
Token type: Separator, Value: ;
Token type: Keyword, Value: double
Token type: Identifier, Value: b
Token type: Operator, Value: =
Token type: Real, Value: 1.5
Token type: Separator, Value: ;
Token type: Keyword, Value: long
Token type: Identifier, Value: c
Token type: Operator, Value: =
Token type: Integer, Value: 100L
Token type: Separator, Value: ;
Token type: Keyword, Value: char
Token type: Identifier, Value: d
Token type: Operator, Value: =
Token type: CharLiteral, Value: d
Token type: Separator, Value: ;
Token type: Keyword, Value: char
Token type: Identifier, Value: s
Token type: Separator, Value: [
Token type: Integer, Value: 6
Token type: Separator, Value: ]
Token type: Operator, Value: =
Token type: StringLiteral, Value: hello
Token type: Separator, Value: ;
Token type: Comment, Value: Multiline commentMultiline commentToken type: Keyword, Value: if
Token type: Separator, Value: (
Token type: Identifier, Value: a
Token type: Operator, Value: >
Token type: Integer, Value: 0
Token type: Separator, Value: )
Token type: Separator, Value: {
Token type: Identifier, Value: printf
Token type: Separator, Value: (
Token type: StringLiteral, Value: %s
Token type: Separator, Value: ,
Token type: Identifier, Value: s
Token type: Separator, Value: )
Token type: Separator, Value: ;
Token type: Separator, Value: }
Token type: Keyword, Value: else
Token type: Separator, Value: {
Token type: Identifier, Value: c
Token type: Operator, Value: =
Token type: Identifier, Value: a
Token type: Operator, Value: +
Token type: Identifier, Value: N
Token type: Separator, Value: ;
Token type: Separator, Value: }
Token type: Keyword, Value: return
Token type: Integer, Value: 0
Token type: Separator, Value: ;
Token type: Separator, Value: }
注:在mylexer.cpp中,筆者定義了一個(gè)名為flushBuffer的Lambda函數(shù),它將buffer中的內(nèi)容添加到tokens向量,并清空buffer。
下面來(lái)詳細(xì)解釋一下這個(gè)Lambda函數(shù):
auto flushBuffer:我們使用auto關(guān)鍵字來(lái)定義一個(gè)名為flushBuffer的變量,它將存儲(chǔ)我們的Lambda表達(dá)式。auto關(guān)鍵字告訴編譯器根據(jù)Lambda表達(dá)式的類(lèi)型自動(dòng)推導(dǎo)flushBuffer的類(lèi)型。
[&]():這是Lambda表達(dá)式的開(kāi)頭部分,方括號(hào)[]內(nèi)表示Lambda函數(shù)的捕獲說(shuō)明符。在這個(gè)例子中,我們使用&表示按引用捕獲所有外部變量。這意味著在Lambda函數(shù)內(nèi)部,我們可以訪問(wèn)并修改外部作用域中的變量,例如buffer和tokens。括號(hào)()表示Lambda函數(shù)沒(méi)有參數(shù)。
{}:這是Lambda函數(shù)的主體,大括號(hào){}內(nèi)包含了函數(shù)的實(shí)現(xiàn)。在這個(gè)例子中,我們檢查buffer是否為空,如果不為空,我們將buffer中的內(nèi)容添加到tokens向量,并清空buffer。
C++中的lambda表達(dá)式是一種創(chuàng)建匿名函數(shù)對(duì)象的便捷方式。自C++11起,lambda表達(dá)式成為了C++的一部分。它們通常用于定義簡(jiǎn)短的函數(shù),可以直接在需要使用它們的地方定義。Lambda表達(dá)式的語(yǔ)法如下:
[capture](parameters) -> return_type { function_body }
- capture:捕獲列表,用于捕獲來(lái)自定義lambda的作用域內(nèi)的變量。捕獲列表可以按值或按引用捕獲變量。
- parameters:函數(shù)參數(shù)列表,與常規(guī)函數(shù)參數(shù)列表類(lèi)似。
- return_type:返回類(lèi)型(可選)。如果省略此部分,編譯器會(huì)自動(dòng)推導(dǎo)返回類(lèi)型(通常為void或單個(gè)?return 語(yǔ)句的類(lèi)型)。
- function_body:函數(shù)體,包含實(shí)現(xiàn)所需功能的代碼。
只看上面的概念還是太抽象了,我們舉個(gè)簡(jiǎn)單的例子,來(lái)直觀地感受一下Lambda表達(dá)式
#include <iostream>
#include <vector>
#include <algorithm>using namespace std;int main() {vector<int> numbers = {1, 2, 3, 4, 5};int factor = 3;// vector數(shù)組中每個(gè)元素都乘以factorfor_each(numbers.begin(), numbers.end(), [factor](int& number) {number *= factor;});// 打印修改過(guò)的number數(shù)組for (const auto& number : numbers) {cout << number << " ";}return 0;
}
輸出結(jié)果為:
3 6 9 12 15