0001 ========================================
0002 IPF Machine Check (MC) error inject tool
0003 ========================================
0004
0005 IPF Machine Check (MC) error inject tool is used to inject MC
0006 errors from Linux. The tool is a test bed for IPF MC work flow including
0007 hardware correctable error handling, OS recoverable error handling, MC
0008 event logging, etc.
0009
0010 The tool includes two parts: a kernel driver and a user application
0011 sample. The driver provides interface to PAL to inject error
0012 and query error injection capabilities. The driver code is in
0013 arch/ia64/kernel/err_inject.c. The application sample (shown below)
0014 provides a combination of various errors and calls the driver's interface
0015 (sysfs interface) to inject errors or query error injection capabilities.
0016
0017 The tool can be used to test Intel IPF machine MC handling capabilities.
0018 It's especially useful for people who can not access hardware MC injection
0019 tool to inject error. It's also very useful to integrate with other
0020 software test suits to do stressful testing on IPF.
0021
0022 Below is a sample application as part of the whole tool. The sample
0023 can be used as a working test tool. Or it can be expanded to include
0024 more features. It also can be a integrated into a library or other user
0025 application to have more thorough test.
0026
0027 The sample application takes err.conf as error configuration input. GCC
0028 compiles the code. After you install err_inject driver, you can run
0029 this sample application to inject errors.
0030
0031 Errata: Itanium 2 Processors Specification Update lists some errata against
0032 the pal_mc_error_inject PAL procedure. The following err.conf has been tested
0033 on latest Montecito PAL.
0034
0035 err.conf::
0036
0037 #This is configuration file for err_inject_tool.
0038 #The format of the each line is:
0039 #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
0040 #where
0041 # cpu: logical cpu number the error will be inject in.
0042 # loop: times the error will be injected.
0043 # interval: In second. every so often one error is injected.
0044 # err_type_info, err_struct_info: PAL parameters.
0045 #
0046 #Note: All values are hex w/o or w/ 0x prefix.
0047
0048
0049 #On cpu2, inject only total 0x10 errors, interval 5 seconds
0050 #corrected, data cache, hier-2, physical addr(assigned by tool code).
0051 #working on Montecito latest PAL.
0052 2, 10, 5, 4101, 95
0053
0054 #On cpu4, inject and consume total 0x10 errors, interval 5 seconds
0055 #corrected, data cache, hier-2, physical addr(assigned by tool code).
0056 #working on Montecito latest PAL.
0057 4, 10, 5, 4109, 95
0058
0059 #On cpu15, inject and consume total 0x10 errors, interval 5 seconds
0060 #recoverable, DTR0, hier-2.
0061 #working on Montecito latest PAL.
0062 0xf, 0x10, 5, 4249, 15
0063
0064 The sample application source code:
0065
0066 err_injection_tool.c::
0067
0068 /*
0069 * This program is free software; you can redistribute it and/or modify
0070 * it under the terms of the GNU General Public License as published by
0071 * the Free Software Foundation; either version 2 of the License, or
0072 * (at your option) any later version.
0073 *
0074 * This program is distributed in the hope that it will be useful, but
0075 * WITHOUT ANY WARRANTY; without even the implied warranty of
0076 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
0077 * NON INFRINGEMENT. See the GNU General Public License for more
0078 * details.
0079 *
0080 * You should have received a copy of the GNU General Public License
0081 * along with this program; if not, write to the Free Software
0082 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0083 *
0084 * Copyright (C) 2006 Intel Co
0085 * Fenghua Yu <fenghua.yu@intel.com>
0086 *
0087 */
0088 #include <sys/types.h>
0089 #include <sys/stat.h>
0090 #include <fcntl.h>
0091 #include <stdio.h>
0092 #include <sched.h>
0093 #include <unistd.h>
0094 #include <stdlib.h>
0095 #include <stdarg.h>
0096 #include <string.h>
0097 #include <errno.h>
0098 #include <time.h>
0099 #include <sys/ipc.h>
0100 #include <sys/sem.h>
0101 #include <sys/wait.h>
0102 #include <sys/mman.h>
0103 #include <sys/shm.h>
0104
0105 #define MAX_FN_SIZE 256
0106 #define MAX_BUF_SIZE 256
0107 #define DATA_BUF_SIZE 256
0108 #define NR_CPUS 512
0109 #define MAX_TASK_NUM 2048
0110 #define MIN_INTERVAL 5 // seconds
0111 #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
0112 #define PARA_FIELD_NUM 5
0113 #define MASK_SIZE (NR_CPUS/64)
0114 #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
0115
0116 int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
0117
0118 int verbose;
0119 #define vbprintf if (verbose) printf
0120
0121 int log_info(int cpu, const char *fmt, ...)
0122 {
0123 FILE *log;
0124 char fn[MAX_FN_SIZE];
0125 char buf[MAX_BUF_SIZE];
0126 va_list args;
0127
0128 sprintf(fn, "%d.log", cpu);
0129 log=fopen(fn, "a+");
0130 if (log==NULL) {
0131 perror("Error open:");
0132 return -1;
0133 }
0134
0135 va_start(args, fmt);
0136 vprintf(fmt, args);
0137 memset(buf, 0, MAX_BUF_SIZE);
0138 vsprintf(buf, fmt, args);
0139 va_end(args);
0140
0141 fwrite(buf, sizeof(buf), 1, log);
0142 fclose(log);
0143
0144 return 0;
0145 }
0146
0147 typedef unsigned long u64;
0148 typedef unsigned int u32;
0149
0150 typedef union err_type_info_u {
0151 struct {
0152 u64 mode : 3, /* 0-2 */
0153 err_inj : 3, /* 3-5 */
0154 err_sev : 2, /* 6-7 */
0155 err_struct : 5, /* 8-12 */
0156 struct_hier : 3, /* 13-15 */
0157 reserved : 48; /* 16-63 */
0158 } err_type_info_u;
0159 u64 err_type_info;
0160 } err_type_info_t;
0161
0162 typedef union err_struct_info_u {
0163 struct {
0164 u64 siv : 1, /* 0 */
0165 c_t : 2, /* 1-2 */
0166 cl_p : 3, /* 3-5 */
0167 cl_id : 3, /* 6-8 */
0168 cl_dp : 1, /* 9 */
0169 reserved1 : 22, /* 10-31 */
0170 tiv : 1, /* 32 */
0171 trigger : 4, /* 33-36 */
0172 trigger_pl : 3, /* 37-39 */
0173 reserved2 : 24; /* 40-63 */
0174 } err_struct_info_cache;
0175 struct {
0176 u64 siv : 1, /* 0 */
0177 tt : 2, /* 1-2 */
0178 tc_tr : 2, /* 3-4 */
0179 tr_slot : 8, /* 5-12 */
0180 reserved1 : 19, /* 13-31 */
0181 tiv : 1, /* 32 */
0182 trigger : 4, /* 33-36 */
0183 trigger_pl : 3, /* 37-39 */
0184 reserved2 : 24; /* 40-63 */
0185 } err_struct_info_tlb;
0186 struct {
0187 u64 siv : 1, /* 0 */
0188 regfile_id : 4, /* 1-4 */
0189 reg_num : 7, /* 5-11 */
0190 reserved1 : 20, /* 12-31 */
0191 tiv : 1, /* 32 */
0192 trigger : 4, /* 33-36 */
0193 trigger_pl : 3, /* 37-39 */
0194 reserved2 : 24; /* 40-63 */
0195 } err_struct_info_register;
0196 struct {
0197 u64 reserved;
0198 } err_struct_info_bus_processor_interconnect;
0199 u64 err_struct_info;
0200 } err_struct_info_t;
0201
0202 typedef union err_data_buffer_u {
0203 struct {
0204 u64 trigger_addr; /* 0-63 */
0205 u64 inj_addr; /* 64-127 */
0206 u64 way : 5, /* 128-132 */
0207 index : 20, /* 133-152 */
0208 : 39; /* 153-191 */
0209 } err_data_buffer_cache;
0210 struct {
0211 u64 trigger_addr; /* 0-63 */
0212 u64 inj_addr; /* 64-127 */
0213 u64 way : 5, /* 128-132 */
0214 index : 20, /* 133-152 */
0215 reserved : 39; /* 153-191 */
0216 } err_data_buffer_tlb;
0217 struct {
0218 u64 trigger_addr; /* 0-63 */
0219 } err_data_buffer_register;
0220 struct {
0221 u64 reserved; /* 0-63 */
0222 } err_data_buffer_bus_processor_interconnect;
0223 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
0224 } err_data_buffer_t;
0225
0226 typedef union capabilities_u {
0227 struct {
0228 u64 i : 1,
0229 d : 1,
0230 rv : 1,
0231 tag : 1,
0232 data : 1,
0233 mesi : 1,
0234 dp : 1,
0235 reserved1 : 3,
0236 pa : 1,
0237 va : 1,
0238 wi : 1,
0239 reserved2 : 20,
0240 trigger : 1,
0241 trigger_pl : 1,
0242 reserved3 : 30;
0243 } capabilities_cache;
0244 struct {
0245 u64 d : 1,
0246 i : 1,
0247 rv : 1,
0248 tc : 1,
0249 tr : 1,
0250 reserved1 : 27,
0251 trigger : 1,
0252 trigger_pl : 1,
0253 reserved2 : 30;
0254 } capabilities_tlb;
0255 struct {
0256 u64 gr_b0 : 1,
0257 gr_b1 : 1,
0258 fr : 1,
0259 br : 1,
0260 pr : 1,
0261 ar : 1,
0262 cr : 1,
0263 rr : 1,
0264 pkr : 1,
0265 dbr : 1,
0266 ibr : 1,
0267 pmc : 1,
0268 pmd : 1,
0269 reserved1 : 3,
0270 regnum : 1,
0271 reserved2 : 15,
0272 trigger : 1,
0273 trigger_pl : 1,
0274 reserved3 : 30;
0275 } capabilities_register;
0276 struct {
0277 u64 reserved;
0278 } capabilities_bus_processor_interconnect;
0279 } capabilities_t;
0280
0281 typedef struct resources_s {
0282 u64 ibr0 : 1,
0283 ibr2 : 1,
0284 ibr4 : 1,
0285 ibr6 : 1,
0286 dbr0 : 1,
0287 dbr2 : 1,
0288 dbr4 : 1,
0289 dbr6 : 1,
0290 reserved : 48;
0291 } resources_t;
0292
0293
0294 long get_page_size(void)
0295 {
0296 long page_size=sysconf(_SC_PAGESIZE);
0297 return page_size;
0298 }
0299
0300 #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
0301 #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
0302 #define SHM_VA 0x2000000100000000
0303
0304 int shmid;
0305 void *shmaddr;
0306
0307 int create_shm(void)
0308 {
0309 key_t key;
0310 char fn[MAX_FN_SIZE];
0311
0312 /* cpu0 is always existing */
0313 sprintf(fn, PATH_FORMAT, 0);
0314 if ((key = ftok(fn, 's')) == -1) {
0315 perror("ftok");
0316 return -1;
0317 }
0318
0319 shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
0320 if (shmid == -1) {
0321 if (errno==EEXIST) {
0322 shmid = shmget(key, SHM_SIZE, 0);
0323 if (shmid == -1) {
0324 perror("shmget");
0325 return -1;
0326 }
0327 }
0328 else {
0329 perror("shmget");
0330 return -1;
0331 }
0332 }
0333 vbprintf("shmid=%d", shmid);
0334
0335 /* connect to the segment: */
0336 shmaddr = shmat(shmid, (void *)SHM_VA, 0);
0337 if (shmaddr == (void*)-1) {
0338 perror("shmat");
0339 return -1;
0340 }
0341
0342 memset(shmaddr, 0, SHM_SIZE);
0343 mlock(shmaddr, SHM_SIZE);
0344
0345 return 0;
0346 }
0347
0348 int free_shm()
0349 {
0350 munlock(shmaddr, SHM_SIZE);
0351 shmdt(shmaddr);
0352 semctl(shmid, 0, IPC_RMID);
0353
0354 return 0;
0355 }
0356
0357 #ifdef _SEM_SEMUN_UNDEFINED
0358 union semun
0359 {
0360 int val;
0361 struct semid_ds *buf;
0362 unsigned short int *array;
0363 struct seminfo *__buf;
0364 };
0365 #endif
0366
0367 u32 mode=1; /* 1: physical mode; 2: virtual mode. */
0368 int one_lock=1;
0369 key_t key[NR_CPUS];
0370 int semid[NR_CPUS];
0371
0372 int create_sem(int cpu)
0373 {
0374 union semun arg;
0375 char fn[MAX_FN_SIZE];
0376 int sid;
0377
0378 sprintf(fn, PATH_FORMAT, cpu);
0379 sprintf(fn, "%s/%s", fn, "err_type_info");
0380 if ((key[cpu] = ftok(fn, 'e')) == -1) {
0381 perror("ftok");
0382 return -1;
0383 }
0384
0385 if (semid[cpu]!=0)
0386 return 0;
0387
0388 /* clear old semaphore */
0389 if ((sid = semget(key[cpu], 1, 0)) != -1)
0390 semctl(sid, 0, IPC_RMID);
0391
0392 /* get one semaphore */
0393 if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
0394 perror("semget");
0395 printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
0396 (u64)key[cpu]);
0397 return -1;
0398 }
0399
0400 vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
0401 (u64)key[cpu]);
0402 /* initialize the semaphore to 1: */
0403 arg.val = 1;
0404 if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
0405 perror("semctl");
0406 return -1;
0407 }
0408
0409 return 0;
0410 }
0411
0412 static int lock(int cpu)
0413 {
0414 struct sembuf lock;
0415
0416 lock.sem_num = cpu;
0417 lock.sem_op = 1;
0418 semop(semid[cpu], &lock, 1);
0419
0420 return 0;
0421 }
0422
0423 static int unlock(int cpu)
0424 {
0425 struct sembuf unlock;
0426
0427 unlock.sem_num = cpu;
0428 unlock.sem_op = -1;
0429 semop(semid[cpu], &unlock, 1);
0430
0431 return 0;
0432 }
0433
0434 void free_sem(int cpu)
0435 {
0436 semctl(semid[cpu], 0, IPC_RMID);
0437 }
0438
0439 int wr_multi(char *fn, unsigned long *data, int size)
0440 {
0441 int fd;
0442 char buf[MAX_BUF_SIZE];
0443 int ret;
0444
0445 if (size==1)
0446 sprintf(buf, "%lx", *data);
0447 else if (size==3)
0448 sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
0449 else {
0450 fprintf(stderr,"write to file with wrong size!\n");
0451 return -1;
0452 }
0453
0454 fd=open(fn, O_RDWR);
0455 if (!fd) {
0456 perror("Error:");
0457 return -1;
0458 }
0459 ret=write(fd, buf, sizeof(buf));
0460 close(fd);
0461 return ret;
0462 }
0463
0464 int wr(char *fn, unsigned long data)
0465 {
0466 return wr_multi(fn, &data, 1);
0467 }
0468
0469 int rd(char *fn, unsigned long *data)
0470 {
0471 int fd;
0472 char buf[MAX_BUF_SIZE];
0473
0474 fd=open(fn, O_RDONLY);
0475 if (fd<0) {
0476 perror("Error:");
0477 return -1;
0478 }
0479 read(fd, buf, MAX_BUF_SIZE);
0480 *data=strtoul(buf, NULL, 16);
0481 close(fd);
0482 return 0;
0483 }
0484
0485 int rd_status(char *path, int *status)
0486 {
0487 char fn[MAX_FN_SIZE];
0488 sprintf(fn, "%s/status", path);
0489 if (rd(fn, (u64*)status)<0) {
0490 perror("status reading error.\n");
0491 return -1;
0492 }
0493
0494 return 0;
0495 }
0496
0497 int rd_capabilities(char *path, u64 *capabilities)
0498 {
0499 char fn[MAX_FN_SIZE];
0500 sprintf(fn, "%s/capabilities", path);
0501 if (rd(fn, capabilities)<0) {
0502 perror("capabilities reading error.\n");
0503 return -1;
0504 }
0505
0506 return 0;
0507 }
0508
0509 int rd_all(char *path)
0510 {
0511 unsigned long err_type_info, err_struct_info, err_data_buffer;
0512 int status;
0513 unsigned long capabilities, resources;
0514 char fn[MAX_FN_SIZE];
0515
0516 sprintf(fn, "%s/err_type_info", path);
0517 if (rd(fn, &err_type_info)<0) {
0518 perror("err_type_info reading error.\n");
0519 return -1;
0520 }
0521 printf("err_type_info=%lx\n", err_type_info);
0522
0523 sprintf(fn, "%s/err_struct_info", path);
0524 if (rd(fn, &err_struct_info)<0) {
0525 perror("err_struct_info reading error.\n");
0526 return -1;
0527 }
0528 printf("err_struct_info=%lx\n", err_struct_info);
0529
0530 sprintf(fn, "%s/err_data_buffer", path);
0531 if (rd(fn, &err_data_buffer)<0) {
0532 perror("err_data_buffer reading error.\n");
0533 return -1;
0534 }
0535 printf("err_data_buffer=%lx\n", err_data_buffer);
0536
0537 sprintf(fn, "%s/status", path);
0538 if (rd("status", (u64*)&status)<0) {
0539 perror("status reading error.\n");
0540 return -1;
0541 }
0542 printf("status=%d\n", status);
0543
0544 sprintf(fn, "%s/capabilities", path);
0545 if (rd(fn,&capabilities)<0) {
0546 perror("capabilities reading error.\n");
0547 return -1;
0548 }
0549 printf("capabilities=%lx\n", capabilities);
0550
0551 sprintf(fn, "%s/resources", path);
0552 if (rd(fn, &resources)<0) {
0553 perror("resources reading error.\n");
0554 return -1;
0555 }
0556 printf("resources=%lx\n", resources);
0557
0558 return 0;
0559 }
0560
0561 int query_capabilities(char *path, err_type_info_t err_type_info,
0562 u64 *capabilities)
0563 {
0564 char fn[MAX_FN_SIZE];
0565 err_struct_info_t err_struct_info;
0566 err_data_buffer_t err_data_buffer;
0567
0568 err_struct_info.err_struct_info=0;
0569 memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
0570
0571 sprintf(fn, "%s/err_type_info", path);
0572 wr(fn, err_type_info.err_type_info);
0573 sprintf(fn, "%s/err_struct_info", path);
0574 wr(fn, 0x0);
0575 sprintf(fn, "%s/err_data_buffer", path);
0576 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
0577
0578 // Fire pal_mc_error_inject procedure.
0579 sprintf(fn, "%s/call_start", path);
0580 wr(fn, mode);
0581
0582 if (rd_capabilities(path, capabilities)<0)
0583 return -1;
0584
0585 return 0;
0586 }
0587
0588 int query_all_capabilities()
0589 {
0590 int status;
0591 err_type_info_t err_type_info;
0592 int err_sev, err_struct, struct_hier;
0593 int cap=0;
0594 u64 capabilities;
0595 char path[MAX_FN_SIZE];
0596
0597 err_type_info.err_type_info=0; // Initial
0598 err_type_info.err_type_info_u.mode=0; // Query mode;
0599 err_type_info.err_type_info_u.err_inj=0;
0600
0601 printf("All capabilities implemented in pal_mc_error_inject:\n");
0602 sprintf(path, PATH_FORMAT ,0);
0603 for (err_sev=0;err_sev<3;err_sev++)
0604 for (err_struct=0;err_struct<5;err_struct++)
0605 for (struct_hier=0;struct_hier<5;struct_hier++)
0606 {
0607 status=-1;
0608 capabilities=0;
0609 err_type_info.err_type_info_u.err_sev=err_sev;
0610 err_type_info.err_type_info_u.err_struct=err_struct;
0611 err_type_info.err_type_info_u.struct_hier=struct_hier;
0612
0613 if (query_capabilities(path, err_type_info, &capabilities)<0)
0614 continue;
0615
0616 if (rd_status(path, &status)<0)
0617 continue;
0618
0619 if (status==0) {
0620 cap=1;
0621 printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
0622 err_sev, err_struct, struct_hier);
0623 printf("capabilities 0x%lx\n", capabilities);
0624 }
0625 }
0626 if (!cap) {
0627 printf("No capabilities supported.\n");
0628 return 0;
0629 }
0630
0631 return 0;
0632 }
0633
0634 int err_inject(int cpu, char *path, err_type_info_t err_type_info,
0635 err_struct_info_t err_struct_info,
0636 err_data_buffer_t err_data_buffer)
0637 {
0638 int status;
0639 char fn[MAX_FN_SIZE];
0640
0641 log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
0642 err_type_info.err_type_info,
0643 err_struct_info.err_struct_info);
0644 log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
0645 err_data_buffer.err_data_buffer[0],
0646 err_data_buffer.err_data_buffer[1],
0647 err_data_buffer.err_data_buffer[2]);
0648 sprintf(fn, "%s/err_type_info", path);
0649 wr(fn, err_type_info.err_type_info);
0650 sprintf(fn, "%s/err_struct_info", path);
0651 wr(fn, err_struct_info.err_struct_info);
0652 sprintf(fn, "%s/err_data_buffer", path);
0653 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
0654
0655 // Fire pal_mc_error_inject procedure.
0656 sprintf(fn, "%s/call_start", path);
0657 wr(fn,mode);
0658
0659 if (rd_status(path, &status)<0) {
0660 vbprintf("fail: read status\n");
0661 return -100;
0662 }
0663
0664 if (status!=0) {
0665 log_info(cpu, "fail: status=%d\n", status);
0666 return status;
0667 }
0668
0669 return status;
0670 }
0671
0672 static int construct_data_buf(char *path, err_type_info_t err_type_info,
0673 err_struct_info_t err_struct_info,
0674 err_data_buffer_t *err_data_buffer,
0675 void *va1)
0676 {
0677 char fn[MAX_FN_SIZE];
0678 u64 virt_addr=0, phys_addr=0;
0679
0680 vbprintf("va1=%lx\n", (u64)va1);
0681 memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
0682
0683 switch (err_type_info.err_type_info_u.err_struct) {
0684 case 1: // Cache
0685 switch (err_struct_info.err_struct_info_cache.cl_id) {
0686 case 1: //Virtual addr
0687 err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
0688 break;
0689 case 2: //Phys addr
0690 sprintf(fn, "%s/virtual_to_phys", path);
0691 virt_addr=(u64)va1;
0692 if (wr(fn,virt_addr)<0)
0693 return -1;
0694 rd(fn, &phys_addr);
0695 err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
0696 break;
0697 default:
0698 printf("Not supported cl_id\n");
0699 break;
0700 }
0701 break;
0702 case 2: // TLB
0703 break;
0704 case 3: // Register file
0705 break;
0706 case 4: // Bus/system interconnect
0707 default:
0708 printf("Not supported err_struct\n");
0709 break;
0710 }
0711
0712 return 0;
0713 }
0714
0715 typedef struct {
0716 u64 cpu;
0717 u64 loop;
0718 u64 interval;
0719 u64 err_type_info;
0720 u64 err_struct_info;
0721 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
0722 } parameters_t;
0723
0724 parameters_t line_para;
0725 int para;
0726
0727 static int empty_data_buffer(u64 *err_data_buffer)
0728 {
0729 int empty=1;
0730 int i;
0731
0732 for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
0733 if (err_data_buffer[i]!=-1)
0734 empty=0;
0735
0736 return empty;
0737 }
0738
0739 int err_inj()
0740 {
0741 err_type_info_t err_type_info;
0742 err_struct_info_t err_struct_info;
0743 err_data_buffer_t err_data_buffer;
0744 int count;
0745 FILE *fp;
0746 unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
0747 u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
0748 int num;
0749 int i;
0750 char path[MAX_FN_SIZE];
0751 parameters_t parameters[MAX_TASK_NUM]={};
0752 pid_t child_pid[MAX_TASK_NUM];
0753 time_t current_time;
0754 int status;
0755
0756 if (!para) {
0757 fp=fopen("err.conf", "r");
0758 if (fp==NULL) {
0759 perror("Error open err.conf");
0760 return -1;
0761 }
0762
0763 num=0;
0764 while (!feof(fp)) {
0765 char buf[256];
0766 memset(buf,0,256);
0767 fgets(buf, 256, fp);
0768 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
0769 &cpu, &loop, &interval,&err_type_info_conf,
0770 &err_struct_info_conf,
0771 &err_data_buffer_conf[0],
0772 &err_data_buffer_conf[1],
0773 &err_data_buffer_conf[2]);
0774 if (count!=PARA_FIELD_NUM+3) {
0775 err_data_buffer_conf[0]=-1;
0776 err_data_buffer_conf[1]=-1;
0777 err_data_buffer_conf[2]=-1;
0778 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
0779 &cpu, &loop, &interval,&err_type_info_conf,
0780 &err_struct_info_conf);
0781 if (count!=PARA_FIELD_NUM)
0782 continue;
0783 }
0784
0785 parameters[num].cpu=cpu;
0786 parameters[num].loop=loop;
0787 parameters[num].interval= interval>MIN_INTERVAL
0788 ?interval:MIN_INTERVAL;
0789 parameters[num].err_type_info=err_type_info_conf;
0790 parameters[num].err_struct_info=err_struct_info_conf;
0791 memcpy(parameters[num++].err_data_buffer,
0792 err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
0793
0794 if (num>=MAX_TASK_NUM)
0795 break;
0796 }
0797 }
0798 else {
0799 parameters[0].cpu=line_para.cpu;
0800 parameters[0].loop=line_para.loop;
0801 parameters[0].interval= line_para.interval>MIN_INTERVAL
0802 ?line_para.interval:MIN_INTERVAL;
0803 parameters[0].err_type_info=line_para.err_type_info;
0804 parameters[0].err_struct_info=line_para.err_struct_info;
0805 memcpy(parameters[0].err_data_buffer,
0806 line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
0807
0808 num=1;
0809 }
0810
0811 /* Create semaphore: If one_lock, one semaphore for all processors.
0812 Otherwise, one semaphore for each processor. */
0813 if (one_lock) {
0814 if (create_sem(0)) {
0815 printf("Can not create semaphore...exit\n");
0816 free_sem(0);
0817 return -1;
0818 }
0819 }
0820 else {
0821 for (i=0;i<num;i++) {
0822 if (create_sem(parameters[i].cpu)) {
0823 printf("Can not create semaphore for cpu%d...exit\n",i);
0824 free_sem(parameters[num].cpu);
0825 return -1;
0826 }
0827 }
0828 }
0829
0830 /* Create a shm segment which will be used to inject/consume errors on.*/
0831 if (create_shm()==-1) {
0832 printf("Error to create shm...exit\n");
0833 return -1;
0834 }
0835
0836 for (i=0;i<num;i++) {
0837 pid_t pid;
0838
0839 current_time=time(NULL);
0840 log_info(parameters[i].cpu, "\nBegine at %s", ctime(¤t_time));
0841 log_info(parameters[i].cpu, "Configurations:\n");
0842 log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
0843 parameters[i].cpu,
0844 parameters[i].loop,
0845 parameters[i].interval);
0846 log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
0847 parameters[i].err_type_info,
0848 parameters[i].err_struct_info);
0849
0850 sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
0851 err_type_info.err_type_info=parameters[i].err_type_info;
0852 err_struct_info.err_struct_info=parameters[i].err_struct_info;
0853 memcpy(err_data_buffer.err_data_buffer,
0854 parameters[i].err_data_buffer,
0855 ERR_DATA_BUFFER_SIZE*8);
0856
0857 pid=fork();
0858 if (pid==0) {
0859 unsigned long mask[MASK_SIZE];
0860 int j, k;
0861
0862 void *va1, *va2;
0863
0864 /* Allocate two memory areas va1 and va2 in shm */
0865 va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
0866 va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
0867
0868 vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
0869 memset(va1, 0x1, PAGE_SIZE);
0870 memset(va2, 0x2, PAGE_SIZE);
0871
0872 if (empty_data_buffer(err_data_buffer.err_data_buffer))
0873 /* If not specified yet, construct data buffer
0874 * with va1
0875 */
0876 construct_data_buf(path, err_type_info,
0877 err_struct_info, &err_data_buffer,va1);
0878
0879 for (j=0;j<MASK_SIZE;j++)
0880 mask[j]=0;
0881
0882 cpu=parameters[i].cpu;
0883 k = cpu%64;
0884 j = cpu/64;
0885 mask[j] = 1UL << k;
0886
0887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
0888 perror("Error sched_setaffinity:");
0889 return -1;
0890 }
0891
0892 for (j=0; j<parameters[i].loop; j++) {
0893 log_info(parameters[i].cpu,"Injection ");
0894 log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
0895
0896 parameters[i].cpu,j+1, parameters[i].loop);
0897
0898 /* Hold the lock */
0899 if (one_lock)
0900 lock(0);
0901 else
0902 /* Hold lock on this cpu */
0903 lock(parameters[i].cpu);
0904
0905 if ((status=err_inject(parameters[i].cpu,
0906 path, err_type_info,
0907 err_struct_info, err_data_buffer))
0908 ==0) {
0909 /* consume the error for "inject only"*/
0910 memcpy(va2, va1, PAGE_SIZE);
0911 memcpy(va1, va2, PAGE_SIZE);
0912 log_info(parameters[i].cpu,
0913 "successful\n");
0914 }
0915 else {
0916 log_info(parameters[i].cpu,"fail:");
0917 log_info(parameters[i].cpu,
0918 "status=%d\n", status);
0919 unlock(parameters[i].cpu);
0920 break;
0921 }
0922 if (one_lock)
0923 /* Release the lock */
0924 unlock(0);
0925 /* Release lock on this cpu */
0926 else
0927 unlock(parameters[i].cpu);
0928
0929 if (j < parameters[i].loop-1)
0930 sleep(parameters[i].interval);
0931 }
0932 current_time=time(NULL);
0933 log_info(parameters[i].cpu, "Done at %s", ctime(¤t_time));
0934 return 0;
0935 }
0936 else if (pid<0) {
0937 perror("Error fork:");
0938 continue;
0939 }
0940 child_pid[i]=pid;
0941 }
0942 for (i=0;i<num;i++)
0943 waitpid(child_pid[i], NULL, 0);
0944
0945 if (one_lock)
0946 free_sem(0);
0947 else
0948 for (i=0;i<num;i++)
0949 free_sem(parameters[i].cpu);
0950
0951 printf("All done.\n");
0952
0953 return 0;
0954 }
0955
0956 void help()
0957 {
0958 printf("err_inject_tool:\n");
0959 printf("\t-q: query all capabilities. default: off\n");
0960 printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
0961 printf("\t-i: inject errors. default: off\n");
0962 printf("\t-l: one lock per cpu. default: one lock for all\n");
0963 printf("\t-e: error parameters:\n");
0964 printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
0965 printf("\t\t cpu: logical cpu number the error will be inject in.\n");
0966 printf("\t\t loop: times the error will be injected.\n");
0967 printf("\t\t interval: In second. every so often one error is injected.\n");
0968 printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
0969 printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
0970 printf("\t\t it's constructed by tool automatically. Be\n");
0971 printf("\t\t careful to provide err_data_buffer and make\n");
0972 printf("\t\t sure it's working with the environment.\n");
0973 printf("\t Note:no space between error parameters.\n");
0974 printf("\t default: Take error parameters from err.conf instead of command line.\n");
0975 printf("\t-v: verbose. default: off\n");
0976 printf("\t-h: help\n\n");
0977 printf("The tool will take err.conf file as ");
0978 printf("input to inject single or multiple errors ");
0979 printf("on one or multiple cpus in parallel.\n");
0980 }
0981
0982 int main(int argc, char **argv)
0983 {
0984 char c;
0985 int do_err_inj=0;
0986 int do_query_all=0;
0987 int count;
0988 u32 m;
0989
0990 /* Default one lock for all cpu's */
0991 one_lock=1;
0992 while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
0993 switch (c) {
0994 case 'm': /* Procedure mode. 1: phys 2: virt */
0995 count=sscanf(optarg, "%x", &m);
0996 if (count!=1 || (m!=1 && m!=2)) {
0997 printf("Wrong mode number.\n");
0998 help();
0999 return -1;
1000 }
1001 mode=m;
1002 break;
1003 case 'i': /* Inject errors */
1004 do_err_inj=1;
1005 break;
1006 case 'q': /* Query */
1007 do_query_all=1;
1008 break;
1009 case 'v': /* Verbose */
1010 verbose=1;
1011 break;
1012 case 'l': /* One lock per cpu */
1013 one_lock=0;
1014 break;
1015 case 'e': /* error arguments */
1016 /* Take parameters:
1017 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018 * err_data_buffer is optional. Recommend not to specify
1019 * err_data_buffer. Better to use tool to generate it.
1020 */
1021 count=sscanf(optarg,
1022 "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023 &line_para.cpu,
1024 &line_para.loop,
1025 &line_para.interval,
1026 &line_para.err_type_info,
1027 &line_para.err_struct_info,
1028 &line_para.err_data_buffer[0],
1029 &line_para.err_data_buffer[1],
1030 &line_para.err_data_buffer[2]);
1031 if (count!=PARA_FIELD_NUM+3) {
1032 line_para.err_data_buffer[0]=-1,
1033 line_para.err_data_buffer[1]=-1,
1034 line_para.err_data_buffer[2]=-1;
1035 count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036 &line_para.cpu,
1037 &line_para.loop,
1038 &line_para.interval,
1039 &line_para.err_type_info,
1040 &line_para.err_struct_info);
1041 if (count!=PARA_FIELD_NUM) {
1042 printf("Wrong error arguments.\n");
1043 help();
1044 return -1;
1045 }
1046 }
1047 para=1;
1048 break;
1049 continue;
1050 break;
1051 case 'h':
1052 help();
1053 return 0;
1054 default:
1055 break;
1056 }
1057
1058 if (do_query_all)
1059 query_all_capabilities();
1060 if (do_err_inj)
1061 err_inj();
1062
1063 if (!do_query_all && !do_err_inj)
1064 help();
1065
1066 return 0;
1067 }